* support S0EP00 pattern

* added numeric sequence match differentiation step to improve support for generic season/episode patterns
This commit is contained in:
Reinhard Pointner 2013-02-01 08:12:15 +00:00
parent 12b277dacc
commit e631641a0c
3 changed files with 26 additions and 18 deletions

View File

@ -209,9 +209,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
// normalize absolute similarity to similarity rank (5 ranks in total), // normalize absolute similarity to similarity rank (4 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one // so we are less likely to fall for false positives in this pass, and move on to the next one
return (float) (floor(super.getSimilarity(o1, o2) * 5) / 5); return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4);
} }
@ -222,6 +222,15 @@ public enum EpisodeMetrics implements SimilarityMetric {
} }
}), }),
NumericSequence(new SequenceMatchSimilarity() {
@Override
protected String normalize(Object object) {
// simplify file name, if possible
return normalizeObject(object).replaceAll("\\D+", " ").trim();
}
}),
// Match by generic numeric similarity // Match by generic numeric similarity
Numeric(new NumericSimilarityMetric() { Numeric(new NumericSimilarityMetric() {
@ -402,9 +411,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
// 7 pass: prefer episodes that were aired closer to the last modified date of the file // 7 pass: prefer episodes that were aired closer to the last modified date of the file
// 8 pass: resolve remaining collisions via absolute string similarity // 8 pass: resolve remaining collisions via absolute string similarity
if (includeFileMetrics) { if (includeFileMetrics) {
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() }; return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
} else { } else {
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() }; return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
} }
} }

View File

@ -3,7 +3,7 @@ package net.sourceforge.filebot.similarity;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.LinkedHashSet;
import java.util.Scanner; import java.util.Scanner;
import java.util.Set; import java.util.Set;
@ -61,7 +61,7 @@ public class NumericSimilarityMetric implements SimilarityMetric {
@Override @Override
public Set<String> tokenizeToSet(String input) { public Set<String> tokenizeToSet(String input) {
return new HashSet<String>(tokenizeToArrayList(input)); return new LinkedHashSet<String>(tokenizeToArrayList(input));
} }
@ -76,7 +76,6 @@ public class NumericSimilarityMetric implements SimilarityMetric {
return delimiter; return delimiter;
} }
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler(); private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();

View File

@ -29,7 +29,7 @@ public class SeasonEpisodeMatcher {
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})"); patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ... // match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](((?<=[^._ ])[Ee]?\\d{1,3}(\\D|$))+)") { patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") {
@Override @Override
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {