diff --git a/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java b/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java index 10259c54..bed9e137 100644 --- a/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java +++ b/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java @@ -209,9 +209,9 @@ public enum EpisodeMetrics implements SimilarityMetric { @Override public float getSimilarity(Object o1, Object o2) { - // normalize absolute similarity to similarity rank (5 ranks in total), + // normalize absolute similarity to similarity rank (4 ranks in total), // so we are less likely to fall for false positives in this pass, and move on to the next one - return (float) (floor(super.getSimilarity(o1, o2) * 5) / 5); + return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4); } @@ -222,6 +222,15 @@ public enum EpisodeMetrics implements SimilarityMetric { } }), + NumericSequence(new SequenceMatchSimilarity() { + + @Override + protected String normalize(Object object) { + // simplify file name, if possible + return normalizeObject(object).replaceAll("\\D+", " ").trim(); + } + }), + // Match by generic numeric similarity Numeric(new NumericSimilarityMetric() { @@ -402,9 +411,9 @@ public enum EpisodeMetrics implements SimilarityMetric { // 7 pass: prefer episodes that were aired closer to the last modified date of the file // 8 pass: resolve remaining collisions via absolute string similarity if (includeFileMetrics) { - return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() }; + return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() }; } else { - return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() }; + return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() }; } } diff --git a/source/net/sourceforge/filebot/similarity/NumericSimilarityMetric.java b/source/net/sourceforge/filebot/similarity/NumericSimilarityMetric.java index 4372611a..a528e521 100644 --- a/source/net/sourceforge/filebot/similarity/NumericSimilarityMetric.java +++ b/source/net/sourceforge/filebot/similarity/NumericSimilarityMetric.java @@ -3,7 +3,7 @@ package net.sourceforge.filebot.similarity; import java.util.ArrayList; -import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Scanner; import java.util.Set; @@ -18,31 +18,31 @@ public class NumericSimilarityMetric implements SimilarityMetric { private final AbstractStringMetric metric; - + public NumericSimilarityMetric() { // I don't exactly know why, but I get a good matching behavior // when using QGramsDistance or BlockDistance metric = new QGramsDistance(new NumberTokeniser()); } - + @Override public float getSimilarity(Object o1, Object o2) { return metric.getSimilarity(normalize(o1), normalize(o2)); } - + protected String normalize(Object object) { // no need to do anything special here, because we don't care about anything but number patterns anyway return object.toString(); } - + private static class NumberTokeniser implements InterfaceTokeniser { private final String delimiter = "\\D+"; - + @Override public ArrayList tokenizeToArrayList(String input) { ArrayList tokens = new ArrayList(); @@ -58,34 +58,33 @@ public class NumericSimilarityMetric implements SimilarityMetric { return tokens; } - + @Override public Set tokenizeToSet(String input) { - return new HashSet(tokenizeToArrayList(input)); + return new LinkedHashSet(tokenizeToArrayList(input)); } - + @Override public String getShortDescriptionString() { return getClass().getSimpleName(); } - + @Override public String getDelimiters() { return delimiter; } - private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler(); - + @Override public InterfaceTermHandler getStopWordHandler() { return stopWordHandler; } - + @Override public void setStopWordHandler(InterfaceTermHandler stopWordHandler) { this.stopWordHandler = stopWordHandler; diff --git a/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java b/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java index 30b13d46..aed9e237 100644 --- a/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java +++ b/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java @@ -29,7 +29,7 @@ public class SeasonEpisodeMatcher { patterns[0] = new SeasonEpisodePattern(null, "(? process(MatchResult match) {