* encapsulate similarity metrics used in RenamePanel properly

* removed some unused code
* some unit tests
This commit is contained in:
Reinhard Pointner 2009-07-26 16:54:24 +00:00
parent 7dc46efe68
commit 203eedb24e
16 changed files with 238 additions and 214 deletions

View File

@ -28,22 +28,4 @@ public class LengthEqualsMetric implements SimilarityMetric {
return -1; return -1;
} }
@Override
public String getDescription() {
return "Check whether file size is equal or not";
}
@Override
public String getName() {
return "Length";
}
@Override
public String toString() {
return getClass().getName();
}
} }

View File

@ -28,11 +28,11 @@ public class Matcher<V, C> {
private final DisjointMatchCollection<V, C> disjointMatchCollection; private final DisjointMatchCollection<V, C> disjointMatchCollection;
public Matcher(Collection<? extends V> values, Collection<? extends C> candidates, Collection<? extends SimilarityMetric> metrics) { public Matcher(Collection<? extends V> values, Collection<? extends C> candidates, SimilarityMetric[] metrics) {
this.values = new LinkedList<V>(values); this.values = new LinkedList<V>(values);
this.candidates = new LinkedList<C>(candidates); this.candidates = new LinkedList<C>(candidates);
this.metrics = metrics.toArray(new SimilarityMetric[0]); this.metrics = metrics.clone();
this.disjointMatchCollection = new DisjointMatchCollection<V, C>(); this.disjointMatchCollection = new DisjointMatchCollection<V, C>();
} }

View File

@ -2,7 +2,6 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static net.sourceforge.filebot.FileBotUtilities.*;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3; import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3;
@ -26,8 +25,8 @@ public class NameSimilarityMetric implements SimilarityMetric {
protected String normalize(Object object) { protected String normalize(Object object) {
// remove embedded checksum from name, if any // use string representation
String name = removeEmbeddedChecksum(object.toString()); String name = object.toString();
// normalize separators // normalize separators
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " "); name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
@ -36,22 +35,4 @@ public class NameSimilarityMetric implements SimilarityMetric {
return name.trim().toLowerCase(); return name.trim().toLowerCase();
} }
@Override
public String getDescription() {
return "Similarity of names";
}
@Override
public String getName() {
return metric.getShortDescriptionString();
}
@Override
public String toString() {
return getClass().getName();
}
} }

View File

@ -2,8 +2,6 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static net.sourceforge.filebot.FileBotUtilities.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.Scanner; import java.util.Scanner;
@ -20,9 +18,9 @@ public class NumericSimilarityMetric implements SimilarityMetric {
private final AbstractStringMetric metric; private final AbstractStringMetric metric;
public NumericSimilarityMetric() { public NumericSimilarityMetric() {
// I don't really know why, but I get a good matching behavior // I don't exactly know why, but I get a good matching behavior
// when using QGramsDistance or BlockDistance // when using QGramsDistance or BlockDistance
metric = new QGramsDistance(new NumberTokeniser()); metric = new QGramsDistance(new NumberTokeniser());
} }
@ -35,42 +33,22 @@ public class NumericSimilarityMetric implements SimilarityMetric {
protected String normalize(Object object) { protected String normalize(Object object) {
// delete checksum pattern, because it will mess with the number tokens // no need to do anything special here, because we don't care about anything but number patterns anyway
return removeEmbeddedChecksum(object.toString()); return object.toString();
} }
@Override private static class NumberTokeniser implements InterfaceTokeniser {
public String getDescription() {
return "Similarity of number patterns";
}
@Override
public String getName() {
return "Numbers";
}
@Override
public String toString() {
return getClass().getName();
}
protected static class NumberTokeniser implements InterfaceTokeniser {
private final String delimiter = "\\D+"; private final String delimiter = "\\D+";
@Override @Override
public ArrayList<String> tokenizeToArrayList(String input) { public ArrayList<String> tokenizeToArrayList(String input) {
ArrayList<String> tokens = new ArrayList<String>(); ArrayList<String> tokens = new ArrayList<String>();
Scanner scanner = new Scanner(input);
// scan for number patterns, use non-number pattern as delimiter // scan for number patterns, use non-number pattern as delimiter
scanner.useDelimiter(delimiter); Scanner scanner = new Scanner(input).useDelimiter(delimiter);
while (scanner.hasNextInt()) { while (scanner.hasNextInt()) {
// remove leading zeros from number tokens by scanning for Integers // remove leading zeros from number tokens by scanning for Integers
@ -98,9 +76,10 @@ public class NumericSimilarityMetric implements SimilarityMetric {
return delimiter; return delimiter;
} }
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler(); private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
@Override @Override
public InterfaceTermHandler getStopWordHandler() { public InterfaceTermHandler getStopWordHandler() {
return stopWordHandler; return stopWordHandler;

View File

@ -12,7 +12,7 @@ public class SeasonEpisodeSimilarityMetric implements SimilarityMetric {
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(); private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher();
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
Collection<SxE> sxeVector1 = parse(o1); Collection<SxE> sxeVector1 = parse(o1);
@ -52,22 +52,4 @@ public class SeasonEpisodeSimilarityMetric implements SimilarityMetric {
return seasonEpisodeMatcher.match(object.toString()); return seasonEpisodeMatcher.match(object.toString());
} }
@Override
public String getDescription() {
return "Similarity of season and episode numbers";
}
@Override
public String getName() {
return "Season and Episode";
}
@Override
public String toString() {
return getClass().getName();
}
} }

View File

@ -6,10 +6,4 @@ public interface SimilarityMetric {
public float getSimilarity(Object o1, Object o2); public float getSimilarity(Object o1, Object o2);
public String getDescription();
public String getName();
} }

View File

@ -45,13 +45,13 @@ class AutoFetchEpisodeListMatcher extends SwingWorker<List<Match<File, Episode>>
private final List<File> files; private final List<File> files;
private final List<SimilarityMetric> metrics; private final SimilarityMetric[] metrics;
public AutoFetchEpisodeListMatcher(EpisodeListProvider provider, Collection<File> files, Collection<SimilarityMetric> metrics) { public AutoFetchEpisodeListMatcher(EpisodeListProvider provider, Collection<File> files, SimilarityMetric[] metrics) {
this.provider = provider; this.provider = provider;
this.files = new LinkedList<File>(files); this.files = new LinkedList<File>(files);
this.metrics = new ArrayList<SimilarityMetric>(metrics); this.metrics = metrics.clone();
} }

View File

@ -7,9 +7,6 @@ import java.awt.Window;
import java.awt.event.ActionEvent; import java.awt.event.ActionEvent;
import java.beans.PropertyChangeEvent; import java.beans.PropertyChangeEvent;
import java.io.File; import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
@ -23,16 +20,9 @@ import javax.swing.SwingUtilities;
import javax.swing.SwingWorker; import javax.swing.SwingWorker;
import net.sourceforge.filebot.ResourceManager; import net.sourceforge.filebot.ResourceManager;
import net.sourceforge.filebot.similarity.LengthEqualsMetric;
import net.sourceforge.filebot.similarity.Match; import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher; import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.NumericSimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeSimilarityMetric;
import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.tuned.FileUtilities;
import net.sourceforge.tuned.ui.ProgressDialog; import net.sourceforge.tuned.ui.ProgressDialog;
import net.sourceforge.tuned.ui.SwingWorkerPropertyChangeAdapter; import net.sourceforge.tuned.ui.SwingWorkerPropertyChangeAdapter;
import net.sourceforge.tuned.ui.ProgressDialog.Cancellable; import net.sourceforge.tuned.ui.ProgressDialog.Cancellable;
@ -42,103 +32,16 @@ class MatchAction extends AbstractAction {
private final RenameModel model; private final RenameModel model;
private final Collection<SimilarityMetric> metrics;
public MatchAction(RenameModel model) { public MatchAction(RenameModel model) {
super("Match", ResourceManager.getIcon("action.match"));
this.model = model; this.model = model;
this.metrics = createMetrics();
putValue(NAME, "Match");
putValue(SMALL_ICON, ResourceManager.getIcon("action.match"));
putValue(SHORT_DESCRIPTION, "Match files and names"); putValue(SHORT_DESCRIPTION, "Match files and names");
} }
protected Collection<SimilarityMetric> createMetrics() {
SimilarityMetric[] metrics = new SimilarityMetric[4];
// 1. pass: match by file length (fast, but only works when matching torrents or files)
metrics[0] = new LengthEqualsMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// order of arguments is logically irrelevant, but we might be able to save us a call to File.length() this way
return o1 instanceof File ? super.getSimilarity(o2, o1) : super.getSimilarity(o1, o2);
}
@Override
protected long getLength(Object object) {
if (object instanceof AbstractFile) {
return ((AbstractFile) object).getLength();
}
return super.getLength(object);
}
};
// 2. pass: match by season / episode numbers
metrics[1] = new SeasonEpisodeSimilarityMetric() {
@Override
protected Collection<SxE> parse(Object o) {
if (o instanceof Episode) {
Episode episode = (Episode) o;
// create SxE from episode
return Collections.singleton(new SxE(episode.getSeason(), episode.getEpisode()));
}
return super.parse(o);
}
};
// 3. pass: match by generic name similarity (slow, but most matches will have been determined in second pass)
metrics[2] = new NameSimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// normalize absolute similarity to similarity rank (10 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one
return (float) (Math.floor(super.getSimilarity(o1, o2) * 10) / 10);
}
@Override
protected String normalize(Object object) {
if (object instanceof File) {
// compare to filename without extension
object = FileUtilities.getName((File) object);
}
return super.normalize(object);
}
};
// 4. pass: match by generic numeric similarity
metrics[3] = new NumericSimilarityMetric() {
@Override
protected String normalize(Object object) {
if (object instanceof File) {
// compare to filename without extension
object = FileUtilities.getName((File) object);
}
return super.normalize(object);
}
};
return Arrays.asList(metrics);
}
public Collection<SimilarityMetric> getMetrics() {
return Collections.unmodifiableCollection(metrics);
}
public void actionPerformed(ActionEvent evt) { public void actionPerformed(ActionEvent evt) {
if (model.names().isEmpty() || model.files().isEmpty()) if (model.names().isEmpty() || model.files().isEmpty())
return; return;
@ -147,7 +50,7 @@ class MatchAction extends AbstractAction {
SwingUtilities.getRoot(eventSource).setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); SwingUtilities.getRoot(eventSource).setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
BackgroundMatcher backgroundMatcher = new BackgroundMatcher(model, metrics); BackgroundMatcher backgroundMatcher = new BackgroundMatcher(model, MatchSimilarityMetric.defaultSequence());
backgroundMatcher.execute(); backgroundMatcher.execute();
try { try {
@ -193,7 +96,7 @@ class MatchAction extends AbstractAction {
private final Matcher<Object, File> matcher; private final Matcher<Object, File> matcher;
public BackgroundMatcher(MatchModel<Object, File> model, Collection<SimilarityMetric> metrics) { public BackgroundMatcher(MatchModel<Object, File> model, SimilarityMetric[] metrics) {
// match names against files // match names against files
this.matcher = new Matcher<Object, File>(model.values(), model.candidates(), metrics); this.matcher = new Matcher<Object, File>(model.values(), model.candidates(), metrics);
} }

View File

@ -0,0 +1,124 @@
package net.sourceforge.filebot.ui.panel.rename;
import java.io.File;
import java.util.Collection;
import java.util.Collections;
import net.sourceforge.filebot.FileBotUtilities;
import net.sourceforge.filebot.similarity.LengthEqualsMetric;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.NumericSimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeSimilarityMetric;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.tuned.FileUtilities;
enum MatchSimilarityMetric implements SimilarityMetric {
// Match by file length (only works when matching torrents or files)
Length(new LengthEqualsMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// order of arguments is logically irrelevant, but we might be able to save us a call to File.length() this way
return o1 instanceof File ? super.getSimilarity(o2, o1) : super.getSimilarity(o1, o2);
}
@Override
protected long getLength(Object object) {
if (object instanceof AbstractFile) {
return ((AbstractFile) object).getLength();
}
return super.getLength(object);
}
}),
// Match by season / episode numbers
SeasonEpisode(new SeasonEpisodeSimilarityMetric() {
@Override
protected Collection<SxE> parse(Object object) {
if (object instanceof Episode) {
Episode episode = (Episode) object;
// create SxE from episode
return Collections.singleton(new SxE(episode.getSeason(), episode.getEpisode()));
}
return super.parse(object);
}
}),
// Match by generic name similarity
Name(new NameSimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// normalize absolute similarity to similarity rank (10 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one
return (float) (Math.floor(super.getSimilarity(o1, o2) * 10) / 10);
}
@Override
protected String normalize(Object object) {
// simplify file name, if possible
return super.normalize(normalizeFile(object));
}
}),
// Match by generic numeric similarity
Numeric(new NumericSimilarityMetric() {
@Override
protected String normalize(Object object) {
// simplify file name, if possible
return super.normalize(normalizeFile(object));
}
});
// inner metric
private final SimilarityMetric metric;
private MatchSimilarityMetric(SimilarityMetric metric) {
this.metric = metric;
}
@Override
public float getSimilarity(Object o1, Object o2) {
return metric.getSimilarity(o1, o2);
}
protected static String normalizeFile(Object object) {
String name = object.toString();
// use name without extension
if (object instanceof File) {
name = FileUtilities.getName((File) object);
} else if (object instanceof AbstractFile) {
name = FileUtilities.getNameWithoutExtension(((AbstractFile) object).getName());
}
// remove embedded checksum from name, if any
return FileBotUtilities.removeEmbeddedChecksum(name);
}
public static SimilarityMetric[] defaultSequence() {
// 1. pass: match by file length (fast, but only works when matching torrents or files)
// 2. pass: match by season / episode numbers
// 3. pass: match by generic name similarity (slow, but most matches will have been determined in second pass)
// 4. pass: match by generic numeric similarity
return new SimilarityMetric[] { Length, SeasonEpisode, Name, Numeric };
}
}

View File

@ -280,7 +280,7 @@ public class RenamePanel extends JComponent {
// clear names list // clear names list
renameModel.values().clear(); renameModel.values().clear();
AutoFetchEpisodeListMatcher worker = new AutoFetchEpisodeListMatcher(provider, renameModel.files(), matchAction.getMetrics()) { AutoFetchEpisodeListMatcher worker = new AutoFetchEpisodeListMatcher(provider, renameModel.files(), MatchSimilarityMetric.defaultSequence()) {
@Override @Override
protected void done() { protected void done() {

View File

@ -8,6 +8,8 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class FileUtilities { public final class FileUtilities {
@ -27,6 +29,14 @@ public final class FileUtilities {
} }
/**
* Pattern used for matching file extensions.
*
* e.g. "file.txt" -> match "txt", ".hidden" -> no match
*/
private static final Pattern extension = Pattern.compile("(?<=.[.])\\p{Alnum}+$");
public static String getExtension(File file) { public static String getExtension(File file) {
if (file.isDirectory()) if (file.isDirectory())
return null; return null;
@ -36,13 +46,14 @@ public final class FileUtilities {
public static String getExtension(String name) { public static String getExtension(String name) {
int dotIndex = name.lastIndexOf("."); Matcher matcher = extension.matcher(name);
// .hidden -> no extension, just hidden if (matcher.find()) {
if (dotIndex > 0 && dotIndex < name.length() - 1) { // extension without leading '.'
return name.substring(dotIndex + 1); return matcher.group();
} }
// no extension
return null; return null;
} }
@ -70,10 +81,11 @@ public final class FileUtilities {
public static String getNameWithoutExtension(String name) { public static String getNameWithoutExtension(String name) {
int dotIndex = name.lastIndexOf("."); Matcher matcher = extension.matcher(name);
if (dotIndex > 0) if (matcher.find()) {
return name.substring(0, dotIndex); return name.substring(0, matcher.start() - 1);
}
// no extension, return given name // no extension, return given name
return name; return name;

View File

@ -11,11 +11,12 @@ import net.sourceforge.filebot.hash.VerificationFormatTest;
import net.sourceforge.filebot.similarity.SimilarityTestSuite; import net.sourceforge.filebot.similarity.SimilarityTestSuite;
import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite; import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite;
import net.sourceforge.filebot.ui.panel.rename.MatchModelTest; import net.sourceforge.filebot.ui.panel.rename.MatchModelTest;
import net.sourceforge.filebot.ui.panel.rename.MatchSimilarityMetricTest;
import net.sourceforge.filebot.web.WebTestSuite; import net.sourceforge.filebot.web.WebTestSuite;
@RunWith(Suite.class) @RunWith(Suite.class)
@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, SubtitleReaderTestSuite.class }) @SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, MatchSimilarityMetricTest.class, SubtitleReaderTestSuite.class })
public class FileBotTestSuite { public class FileBotTestSuite {
} }

View File

@ -11,7 +11,7 @@ public class NameSimilarityMetricTest {
private static NameSimilarityMetric metric = new NameSimilarityMetric(); private static NameSimilarityMetric metric = new NameSimilarityMetric();
@Test @Test
public void getSimilarity() { public void getSimilarity() {
// normalize separators, lower-case // normalize separators, lower-case
@ -19,9 +19,6 @@ public class NameSimilarityMetricTest {
assertEquals(1, metric.getSimilarity("test s01e02 second", "test_[S01E02]_Second"), 0); assertEquals(1, metric.getSimilarity("test s01e02 second", "test_[S01E02]_Second"), 0);
assertEquals(1, metric.getSimilarity("test s01e03 third", "__test__S01E03__Third__"), 0); assertEquals(1, metric.getSimilarity("test s01e03 third", "__test__S01E03__Third__"), 0);
assertEquals(1, metric.getSimilarity("test s01e04 four", "test s01e04 four"), 0); assertEquals(1, metric.getSimilarity("test s01e04 four", "test s01e04 four"), 0);
// remove checksum
assertEquals(1, metric.getSimilarity("test", "test [EF62DF13]"), 0);
} }
} }

View File

@ -0,0 +1,32 @@
package net.sourceforge.filebot.ui.panel.rename;
import static net.sourceforge.filebot.ui.panel.rename.MatchSimilarityMetric.*;
import static org.junit.Assert.*;
import java.io.File;
import org.junit.Test;
public class MatchSimilarityMetricTest {
@Test
public void nameIgnoreEmbeddedChecksum() {
assertEquals(1, Name.getSimilarity("test", "test [EF62DF13]"), 0);
}
@Test
public void numericIgnoreEmbeddedChecksum() {
assertEquals(1, Numeric.getSimilarity("S01E02", "Season 1, Episode 2 [00A01E02]"), 0);
}
@Test
public void normalizeFile() {
assertEquals("abc", MatchSimilarityMetric.normalizeFile(new File("/folder/abc[EF62DF13].txt")));
}
}

View File

@ -0,0 +1,37 @@
package net.sourceforge.tuned;
import static org.junit.Assert.*;
import org.junit.Test;
public class FileUtilitiesTest {
@Test
public void getExtension() {
assertEquals("txt", FileUtilities.getExtension("abc.txt"));
assertEquals("out", FileUtilities.getExtension("a.out"));
assertEquals(null, FileUtilities.getExtension(".hidden"));
assertEquals(null, FileUtilities.getExtension("a."));
assertEquals("r00", FileUtilities.getExtension("archive.r00"));
assertEquals(null, FileUtilities.getExtension("archive.r??"));
assertEquals(null, FileUtilities.getExtension("archive.invalid extension"));
}
@Test
public void getNameWithoutExtension() {
assertEquals("abc", FileUtilities.getNameWithoutExtension("abc.txt"));
assertEquals("a", FileUtilities.getNameWithoutExtension("a.out"));
assertEquals(".hidden", FileUtilities.getNameWithoutExtension(".hidden"));
assertEquals("a.", FileUtilities.getNameWithoutExtension("a."));
assertEquals("archive", FileUtilities.getNameWithoutExtension("archive.r00"));
assertEquals("archive.r??", FileUtilities.getNameWithoutExtension("archive.r??"));
assertEquals("archive.invalid extension", FileUtilities.getNameWithoutExtension("archive.invalid extension"));
}
}

View File

@ -8,7 +8,7 @@ import org.junit.runners.Suite.SuiteClasses;
@RunWith(Suite.class) @RunWith(Suite.class)
@SuiteClasses( { ByteBufferOutputStreamTest.class, PreferencesMapTest.class, PreferencesListTest.class, TreeIteratorTest.class, FilterIteratorTest.class }) @SuiteClasses( { FileUtilitiesTest.class, ByteBufferOutputStreamTest.class, PreferencesMapTest.class, PreferencesListTest.class, TreeIteratorTest.class, FilterIteratorTest.class })
public class TunedTestSuite { public class TunedTestSuite {
} }