* support S0EP00 pattern
* added numeric sequence match differentiation step to improve support for generic season/episode patterns
This commit is contained in:
parent
12b277dacc
commit
e631641a0c
|
@ -209,9 +209,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
// normalize absolute similarity to similarity rank (5 ranks in total),
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
||||||
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
||||||
return (float) (floor(super.getSimilarity(o1, o2) * 5) / 5);
|
return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -222,6 +222,15 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
NumericSequence(new SequenceMatchSimilarity() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String normalize(Object object) {
|
||||||
|
// simplify file name, if possible
|
||||||
|
return normalizeObject(object).replaceAll("\\D+", " ").trim();
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
// Match by generic numeric similarity
|
// Match by generic numeric similarity
|
||||||
Numeric(new NumericSimilarityMetric() {
|
Numeric(new NumericSimilarityMetric() {
|
||||||
|
|
||||||
|
@ -402,9 +411,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
||||||
// 8 pass: resolve remaining collisions via absolute string similarity
|
// 8 pass: resolve remaining collisions via absolute string similarity
|
||||||
if (includeFileMetrics) {
|
if (includeFileMetrics) {
|
||||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() };
|
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
||||||
} else {
|
} else {
|
||||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, Name, TimeStamp, new NameSimilarityMetric() };
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -18,31 +18,31 @@ public class NumericSimilarityMetric implements SimilarityMetric {
|
||||||
|
|
||||||
private final AbstractStringMetric metric;
|
private final AbstractStringMetric metric;
|
||||||
|
|
||||||
|
|
||||||
public NumericSimilarityMetric() {
|
public NumericSimilarityMetric() {
|
||||||
// I don't exactly know why, but I get a good matching behavior
|
// I don't exactly know why, but I get a good matching behavior
|
||||||
// when using QGramsDistance or BlockDistance
|
// when using QGramsDistance or BlockDistance
|
||||||
metric = new QGramsDistance(new NumberTokeniser());
|
metric = new QGramsDistance(new NumberTokeniser());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
return metric.getSimilarity(normalize(o1), normalize(o2));
|
return metric.getSimilarity(normalize(o1), normalize(o2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String normalize(Object object) {
|
protected String normalize(Object object) {
|
||||||
// no need to do anything special here, because we don't care about anything but number patterns anyway
|
// no need to do anything special here, because we don't care about anything but number patterns anyway
|
||||||
return object.toString();
|
return object.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static class NumberTokeniser implements InterfaceTokeniser {
|
private static class NumberTokeniser implements InterfaceTokeniser {
|
||||||
|
|
||||||
private final String delimiter = "\\D+";
|
private final String delimiter = "\\D+";
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ArrayList<String> tokenizeToArrayList(String input) {
|
public ArrayList<String> tokenizeToArrayList(String input) {
|
||||||
ArrayList<String> tokens = new ArrayList<String>();
|
ArrayList<String> tokens = new ArrayList<String>();
|
||||||
|
@ -58,34 +58,33 @@ public class NumericSimilarityMetric implements SimilarityMetric {
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<String> tokenizeToSet(String input) {
|
public Set<String> tokenizeToSet(String input) {
|
||||||
return new HashSet<String>(tokenizeToArrayList(input));
|
return new LinkedHashSet<String>(tokenizeToArrayList(input));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getShortDescriptionString() {
|
public String getShortDescriptionString() {
|
||||||
return getClass().getSimpleName();
|
return getClass().getSimpleName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getDelimiters() {
|
public String getDelimiters() {
|
||||||
return delimiter;
|
return delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
|
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InterfaceTermHandler getStopWordHandler() {
|
public InterfaceTermHandler getStopWordHandler() {
|
||||||
return stopWordHandler;
|
return stopWordHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setStopWordHandler(InterfaceTermHandler stopWordHandler) {
|
public void setStopWordHandler(InterfaceTermHandler stopWordHandler) {
|
||||||
this.stopWordHandler = stopWordHandler;
|
this.stopWordHandler = stopWordHandler;
|
||||||
|
|
|
@ -29,7 +29,7 @@ public class SeasonEpisodeMatcher {
|
||||||
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
|
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
|
||||||
|
|
||||||
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
|
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
|
||||||
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](((?<=[^._ ])[Ee]?\\d{1,3}(\\D|$))+)") {
|
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<SxE> process(MatchResult match) {
|
protected Collection<SxE> process(MatchResult match) {
|
||||||
|
|
Loading…
Reference in New Issue