* support S0EP00 pattern
* added numeric sequence match differentiation step to improve support for generic season/episode patterns
This commit is contained in:
parent
12b277dacc
commit
e631641a0c
|
@ -209,9 +209,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
// normalize absolute similarity to similarity rank (5 ranks in total),
|
||||
// normalize absolute similarity to similarity rank (4 ranks in total),
|
||||
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
||||
return (float) (floor(super.getSimilarity(o1, o2) * 5) / 5);
|
||||
return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4);
|
||||
}
|
||||
|
||||
|
||||
|
@ -222,6 +222,15 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
}
|
||||
}),
|
||||
|
||||
NumericSequence(new SequenceMatchSimilarity() {
|
||||
|
||||
@Override
|
||||
protected String normalize(Object object) {
|
||||
// simplify file name, if possible
|
||||
return normalizeObject(object).replaceAll("\\D+", " ").trim();
|
||||
}
|
||||
}),
|
||||
|
||||
// Match by generic numeric similarity
|
||||
Numeric(new NumericSimilarityMetric() {
|
||||
|
||||
|
@ -402,9 +411,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
||||
// 8 pass: resolve remaining collisions via absolute string similarity
|
||||
if (includeFileMetrics) {
|
||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() };
|
||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
||||
} else {
|
||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() };
|
||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ package net.sourceforge.filebot.similarity;
|
|||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -18,31 +18,31 @@ public class NumericSimilarityMetric implements SimilarityMetric {
|
|||
|
||||
private final AbstractStringMetric metric;
|
||||
|
||||
|
||||
|
||||
public NumericSimilarityMetric() {
|
||||
// I don't exactly know why, but I get a good matching behavior
|
||||
// when using QGramsDistance or BlockDistance
|
||||
metric = new QGramsDistance(new NumberTokeniser());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
return metric.getSimilarity(normalize(o1), normalize(o2));
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected String normalize(Object object) {
|
||||
// no need to do anything special here, because we don't care about anything but number patterns anyway
|
||||
return object.toString();
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static class NumberTokeniser implements InterfaceTokeniser {
|
||||
|
||||
private final String delimiter = "\\D+";
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public ArrayList<String> tokenizeToArrayList(String input) {
|
||||
ArrayList<String> tokens = new ArrayList<String>();
|
||||
|
@ -58,34 +58,33 @@ public class NumericSimilarityMetric implements SimilarityMetric {
|
|||
return tokens;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public Set<String> tokenizeToSet(String input) {
|
||||
return new HashSet<String>(tokenizeToArrayList(input));
|
||||
return new LinkedHashSet<String>(tokenizeToArrayList(input));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String getShortDescriptionString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String getDelimiters() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
|
||||
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public InterfaceTermHandler getStopWordHandler() {
|
||||
return stopWordHandler;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void setStopWordHandler(InterfaceTermHandler stopWordHandler) {
|
||||
this.stopWordHandler = stopWordHandler;
|
||||
|
|
|
@ -29,7 +29,7 @@ public class SeasonEpisodeMatcher {
|
|||
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
|
||||
|
||||
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
|
||||
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](((?<=[^._ ])[Ee]?\\d{1,3}(\\D|$))+)") {
|
||||
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") {
|
||||
|
||||
@Override
|
||||
protected Collection<SxE> process(MatchResult match) {
|
||||
|
|
Loading…
Reference in New Issue