* support S0EP00 pattern

* added numeric sequence match differentiation step to improve support for generic season/episode patterns
This commit is contained in:
Reinhard Pointner 2013-02-01 08:12:15 +00:00
parent 12b277dacc
commit e631641a0c
3 changed files with 26 additions and 18 deletions

View File

@ -209,9 +209,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
// normalize absolute similarity to similarity rank (5 ranks in total), // normalize absolute similarity to similarity rank (4 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one // so we are less likely to fall for false positives in this pass, and move on to the next one
return (float) (floor(super.getSimilarity(o1, o2) * 5) / 5); return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4);
} }
@ -222,6 +222,15 @@ public enum EpisodeMetrics implements SimilarityMetric {
} }
}), }),
NumericSequence(new SequenceMatchSimilarity() {
@Override
protected String normalize(Object object) {
// simplify file name, if possible
return normalizeObject(object).replaceAll("\\D+", " ").trim();
}
}),
// Match by generic numeric similarity // Match by generic numeric similarity
Numeric(new NumericSimilarityMetric() { Numeric(new NumericSimilarityMetric() {
@ -402,9 +411,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
// 7 pass: prefer episodes that were aired closer to the last modified date of the file // 7 pass: prefer episodes that were aired closer to the last modified date of the file
// 8 pass: resolve remaining collisions via absolute string similarity // 8 pass: resolve remaining collisions via absolute string similarity
if (includeFileMetrics) { if (includeFileMetrics) {
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() }; return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
} else { } else {
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() }; return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
} }
} }

View File

@ -3,7 +3,7 @@ package net.sourceforge.filebot.similarity;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.LinkedHashSet;
import java.util.Scanner; import java.util.Scanner;
import java.util.Set; import java.util.Set;
@ -18,31 +18,31 @@ public class NumericSimilarityMetric implements SimilarityMetric {
private final AbstractStringMetric metric; private final AbstractStringMetric metric;
public NumericSimilarityMetric() { public NumericSimilarityMetric() {
// I don't exactly know why, but I get a good matching behavior // I don't exactly know why, but I get a good matching behavior
// when using QGramsDistance or BlockDistance // when using QGramsDistance or BlockDistance
metric = new QGramsDistance(new NumberTokeniser()); metric = new QGramsDistance(new NumberTokeniser());
} }
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
return metric.getSimilarity(normalize(o1), normalize(o2)); return metric.getSimilarity(normalize(o1), normalize(o2));
} }
protected String normalize(Object object) { protected String normalize(Object object) {
// no need to do anything special here, because we don't care about anything but number patterns anyway // no need to do anything special here, because we don't care about anything but number patterns anyway
return object.toString(); return object.toString();
} }
private static class NumberTokeniser implements InterfaceTokeniser { private static class NumberTokeniser implements InterfaceTokeniser {
private final String delimiter = "\\D+"; private final String delimiter = "\\D+";
@Override @Override
public ArrayList<String> tokenizeToArrayList(String input) { public ArrayList<String> tokenizeToArrayList(String input) {
ArrayList<String> tokens = new ArrayList<String>(); ArrayList<String> tokens = new ArrayList<String>();
@ -58,34 +58,33 @@ public class NumericSimilarityMetric implements SimilarityMetric {
return tokens; return tokens;
} }
@Override @Override
public Set<String> tokenizeToSet(String input) { public Set<String> tokenizeToSet(String input) {
return new HashSet<String>(tokenizeToArrayList(input)); return new LinkedHashSet<String>(tokenizeToArrayList(input));
} }
@Override @Override
public String getShortDescriptionString() { public String getShortDescriptionString() {
return getClass().getSimpleName(); return getClass().getSimpleName();
} }
@Override @Override
public String getDelimiters() { public String getDelimiters() {
return delimiter; return delimiter;
} }
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler(); private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
@Override @Override
public InterfaceTermHandler getStopWordHandler() { public InterfaceTermHandler getStopWordHandler() {
return stopWordHandler; return stopWordHandler;
} }
@Override @Override
public void setStopWordHandler(InterfaceTermHandler stopWordHandler) { public void setStopWordHandler(InterfaceTermHandler stopWordHandler) {
this.stopWordHandler = stopWordHandler; this.stopWordHandler = stopWordHandler;

View File

@ -29,7 +29,7 @@ public class SeasonEpisodeMatcher {
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})"); patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ... // match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](((?<=[^._ ])[Ee]?\\d{1,3}(\\D|$))+)") { patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") {
@Override @Override
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {