* improve series name detection by only focusing on the commond word sequence before the SxE pattern
This commit is contained in:
parent
5530bc47f5
commit
b2e092c697
|
@ -15,11 +15,11 @@ File.metaClass.getDir = { getParentFile() }
|
||||||
File.metaClass.hasFile = { c -> isDirectory() && listFiles().find(c) }
|
File.metaClass.hasFile = { c -> isDirectory() && listFiles().find(c) }
|
||||||
|
|
||||||
String.metaClass.getFiles = { c -> new File(delegate).getFiles(c) }
|
String.metaClass.getFiles = { c -> new File(delegate).getFiles(c) }
|
||||||
File.metaClass.getFiles = { c -> def files = []; traverse(type:FILES) { files += it }; return c ? files.findAll(c) : files }
|
File.metaClass.getFiles = { c -> def files = []; traverse(type:FILES) { files += it }; return c ? files.findAll(c).sort() : files.sort() }
|
||||||
List.metaClass.getFiles = { c -> findResults{ it.getFiles(c) }.flatten().unique() }
|
List.metaClass.getFiles = { c -> findResults{ it.getFiles(c) }.flatten().unique() }
|
||||||
|
|
||||||
String.metaClass.getFolders = { c -> new File(delegate).getFolders(c) }
|
String.metaClass.getFolders = { c -> new File(delegate).getFolders(c) }
|
||||||
File.metaClass.getFolders = { c -> def folders = []; traverse(type:DIRECTORIES, visitRoot:true) { folders += it }; return c ? folders.findAll(c) : folders }
|
File.metaClass.getFolders = { c -> def folders = []; traverse(type:DIRECTORIES, visitRoot:true) { folders += it }; return c ? folders.findAll(c).sort() : folders.sort() }
|
||||||
List.metaClass.getFolders = { c -> findResults{ it.getFolders(c) }.flatten().unique() }
|
List.metaClass.getFolders = { c -> findResults{ it.getFolders(c) }.flatten().unique() }
|
||||||
|
|
||||||
String.metaClass.eachMediaFolder = { c -> new File(delegate).eachMediaFolder(c) }
|
String.metaClass.eachMediaFolder = { c -> new File(delegate).eachMediaFolder(c) }
|
||||||
|
@ -135,9 +135,9 @@ List.metaClass.watch = { c -> createWatchService(c, delegate, true) }
|
||||||
import net.sourceforge.filebot.media.*
|
import net.sourceforge.filebot.media.*
|
||||||
import net.sourceforge.filebot.similarity.*
|
import net.sourceforge.filebot.similarity.*
|
||||||
|
|
||||||
def parseEpisodeNumber(path) {
|
def parseEpisodeNumber(path, strict = true) {
|
||||||
def input = path instanceof File ? path.name : path.toString()
|
def input = path instanceof File ? path.name : path.toString()
|
||||||
def sxe = new SeasonEpisodeMatcher(new SeasonEpisodeMatcher.SeasonEpisodeFilter(30, 50, 1000)).match(input)
|
def sxe = new SeasonEpisodeMatcher(new SeasonEpisodeMatcher.SeasonEpisodeFilter(30, 50, 1000), strict).match(input)
|
||||||
return sxe == null || sxe.isEmpty() ? null : sxe[0]
|
return sxe == null || sxe.isEmpty() ? null : sxe[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,10 +14,10 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class SeasonEpisodeMatcher {
|
public class SeasonEpisodeMatcher {
|
||||||
|
|
||||||
private final SeasonEpisodePattern[] patterns;
|
private SeasonEpisodePattern[] patterns;
|
||||||
|
|
||||||
|
|
||||||
public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity) {
|
public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) {
|
||||||
patterns = new SeasonEpisodePattern[3];
|
patterns = new SeasonEpisodePattern[3];
|
||||||
|
|
||||||
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ...
|
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ...
|
||||||
|
@ -38,12 +38,17 @@ public class SeasonEpisodeMatcher {
|
||||||
SxE absoluteEpisode = new SxE(null, match.group(1) + match.group(2));
|
SxE absoluteEpisode = new SxE(null, match.group(1) + match.group(2));
|
||||||
|
|
||||||
// return both matches, unless they are one and the same
|
// return both matches, unless they are one and the same
|
||||||
return seasonEpisode.equals(absoluteEpisode) ? Collections.singleton(absoluteEpisode) : Arrays.asList(seasonEpisode, absoluteEpisode);
|
return seasonEpisode.equals(absoluteEpisode) ? Collections.singleton(seasonEpisode) : Arrays.asList(seasonEpisode, absoluteEpisode);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// only use S00E00 and SxE pattern in strict mode
|
||||||
|
if (strict) {
|
||||||
|
patterns = new SeasonEpisodePattern[] { patterns[0], patterns[1] };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to get season and episode numbers for the given string.
|
* Try to get season and episode numbers for the given string.
|
||||||
*
|
*
|
||||||
|
@ -64,7 +69,7 @@ public class SeasonEpisodeMatcher {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int find(CharSequence name, int fromIndex) {
|
public int find(CharSequence name, int fromIndex) {
|
||||||
for (SeasonEpisodePattern pattern : patterns) {
|
for (SeasonEpisodePattern pattern : patterns) {
|
||||||
int index = pattern.find(name, fromIndex);
|
int index = pattern.find(name, fromIndex);
|
||||||
|
@ -78,7 +83,7 @@ public class SeasonEpisodeMatcher {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Matcher matcher(CharSequence name) {
|
public Matcher matcher(CharSequence name) {
|
||||||
for (SeasonEpisodePattern pattern : patterns) {
|
for (SeasonEpisodePattern pattern : patterns) {
|
||||||
Matcher matcher = pattern.matcher(name);
|
Matcher matcher = pattern.matcher(name);
|
||||||
|
@ -93,7 +98,7 @@ public class SeasonEpisodeMatcher {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static class SxE {
|
public static class SxE {
|
||||||
|
|
||||||
public static final int UNDEFINED = -1;
|
public static final int UNDEFINED = -1;
|
||||||
|
@ -101,19 +106,19 @@ public class SeasonEpisodeMatcher {
|
||||||
public final int season;
|
public final int season;
|
||||||
public final int episode;
|
public final int episode;
|
||||||
|
|
||||||
|
|
||||||
public SxE(Integer season, Integer episode) {
|
public SxE(Integer season, Integer episode) {
|
||||||
this.season = season != null ? season : UNDEFINED;
|
this.season = season != null ? season : UNDEFINED;
|
||||||
this.episode = episode != null ? episode : UNDEFINED;
|
this.episode = episode != null ? episode : UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public SxE(String season, String episode) {
|
public SxE(String season, String episode) {
|
||||||
this.season = parse(season);
|
this.season = parse(season);
|
||||||
this.episode = parse(episode);
|
this.episode = parse(episode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected int parse(String number) {
|
protected int parse(String number) {
|
||||||
try {
|
try {
|
||||||
return Integer.parseInt(number);
|
return Integer.parseInt(number);
|
||||||
|
@ -122,7 +127,7 @@ public class SeasonEpisodeMatcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object object) {
|
public boolean equals(Object object) {
|
||||||
if (object instanceof SxE) {
|
if (object instanceof SxE) {
|
||||||
|
@ -133,62 +138,62 @@ public class SeasonEpisodeMatcher {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Arrays.hashCode(new Object[] { season, episode });
|
return Arrays.hashCode(new Object[] { season, episode });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return season >= 0 ? String.format("%dx%02d", season, episode) : String.format("%02d", episode);
|
return season >= 0 ? String.format("%dx%02d", season, episode) : String.format("%02d", episode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static class SeasonEpisodeFilter {
|
public static class SeasonEpisodeFilter {
|
||||||
|
|
||||||
public final int seasonLimit;
|
public final int seasonLimit;
|
||||||
public final int seasonEpisodeLimit;
|
public final int seasonEpisodeLimit;
|
||||||
public final int absoluteEpisodeLimit;
|
public final int absoluteEpisodeLimit;
|
||||||
|
|
||||||
|
|
||||||
public SeasonEpisodeFilter(int seasonLimit, int seasonEpisodeLimit, int absoluteEpisodeLimit) {
|
public SeasonEpisodeFilter(int seasonLimit, int seasonEpisodeLimit, int absoluteEpisodeLimit) {
|
||||||
this.seasonLimit = seasonLimit;
|
this.seasonLimit = seasonLimit;
|
||||||
this.seasonEpisodeLimit = seasonEpisodeLimit;
|
this.seasonEpisodeLimit = seasonEpisodeLimit;
|
||||||
this.absoluteEpisodeLimit = absoluteEpisodeLimit;
|
this.absoluteEpisodeLimit = absoluteEpisodeLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
boolean filter(SxE sxe) {
|
boolean filter(SxE sxe) {
|
||||||
return (sxe.season >= 0 && sxe.season < seasonLimit && sxe.episode < seasonEpisodeLimit) || (sxe.season < 0 && sxe.episode < absoluteEpisodeLimit);
|
return (sxe.season >= 0 && sxe.season < seasonLimit && sxe.episode < seasonEpisodeLimit) || (sxe.season < 0 && sxe.episode < absoluteEpisodeLimit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static class SeasonEpisodePattern {
|
public static class SeasonEpisodePattern {
|
||||||
|
|
||||||
protected final Pattern pattern;
|
protected final Pattern pattern;
|
||||||
protected final SeasonEpisodeFilter sanity;
|
protected final SeasonEpisodeFilter sanity;
|
||||||
|
|
||||||
|
|
||||||
public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) {
|
public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) {
|
||||||
this.pattern = Pattern.compile(pattern);
|
this.pattern = Pattern.compile(pattern);
|
||||||
this.sanity = sanity;
|
this.sanity = sanity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Matcher matcher(CharSequence name) {
|
public Matcher matcher(CharSequence name) {
|
||||||
return pattern.matcher(name);
|
return pattern.matcher(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Collection<SxE> process(MatchResult match) {
|
protected Collection<SxE> process(MatchResult match) {
|
||||||
return Collections.singleton(new SxE(match.group(1), match.group(2)));
|
return Collections.singleton(new SxE(match.group(1), match.group(2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<SxE> match(CharSequence name) {
|
public List<SxE> match(CharSequence name) {
|
||||||
// name will probably contain no more than two matches
|
// name will probably contain no more than two matches
|
||||||
List<SxE> matches = new ArrayList<SxE>(2);
|
List<SxE> matches = new ArrayList<SxE>(2);
|
||||||
|
@ -206,7 +211,7 @@ public class SeasonEpisodeMatcher {
|
||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int find(CharSequence name, int fromIndex) {
|
public int find(CharSequence name, int fromIndex) {
|
||||||
Matcher matcher = matcher(name).region(fromIndex, name.length());
|
Matcher matcher = matcher(name).region(fromIndex, name.length());
|
||||||
|
|
||||||
|
|
|
@ -10,9 +10,9 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
|
|
||||||
public class SeasonEpisodeMetric implements SimilarityMetric {
|
public class SeasonEpisodeMetric implements SimilarityMetric {
|
||||||
|
|
||||||
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(null);
|
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(null, false);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
Collection<SxE> sxeVector1 = parse(o1);
|
Collection<SxE> sxeVector1 = parse(o1);
|
||||||
|
@ -41,7 +41,7 @@ public class SeasonEpisodeMetric implements SimilarityMetric {
|
||||||
return similarity;
|
return similarity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Collection<SxE> parse(Object object) {
|
protected Collection<SxE> parse(Object object) {
|
||||||
if (object instanceof File) {
|
if (object instanceof File) {
|
||||||
// parse file name
|
// parse file name
|
||||||
|
|
|
@ -28,7 +28,7 @@ import net.sourceforge.tuned.FileUtilities;
|
||||||
|
|
||||||
public class SeriesNameMatcher {
|
public class SeriesNameMatcher {
|
||||||
|
|
||||||
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, 1000));
|
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
||||||
protected final NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
protected final NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||||
|
|
||||||
protected final int commonWordSequenceMaxStartIndex = 3;
|
protected final int commonWordSequenceMaxStartIndex = 3;
|
||||||
|
@ -63,7 +63,16 @@ public class SeriesNameMatcher {
|
||||||
|
|
||||||
// match common word sequences (likely series names)
|
// match common word sequences (likely series names)
|
||||||
SeriesNameCollection whitelist = new SeriesNameCollection();
|
SeriesNameCollection whitelist = new SeriesNameCollection();
|
||||||
whitelist.addAll(deepMatchAll(names, threshold));
|
|
||||||
|
// focus chars before the SxE pattern when matching by common word sequence
|
||||||
|
String[] focus = Arrays.copyOf(names, names.length);
|
||||||
|
for (int i = 0; i < focus.length; i++) {
|
||||||
|
int pos = seasonEpisodeMatcher.find(focus[i], 0);
|
||||||
|
if (pos >= 0) {
|
||||||
|
focus[i] = focus[i].substring(0, pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
whitelist.addAll(deepMatchAll(focus, threshold));
|
||||||
|
|
||||||
// 1. use pattern matching
|
// 1. use pattern matching
|
||||||
seriesNames.addAll(flatMatchAll(names, Pattern.compile(join(whitelist, "|"), Pattern.CASE_INSENSITIVE), threshold, false));
|
seriesNames.addAll(flatMatchAll(names, Pattern.compile(join(whitelist, "|"), Pattern.CASE_INSENSITIVE), threshold, false));
|
||||||
|
|
|
@ -13,9 +13,9 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
|
|
||||||
public class SeasonEpisodeMatcherTest {
|
public class SeasonEpisodeMatcherTest {
|
||||||
|
|
||||||
private static SeasonEpisodeMatcher matcher = new SeasonEpisodeMatcher(null);
|
private static SeasonEpisodeMatcher matcher = new SeasonEpisodeMatcher(null, false);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void patternPrecedence() {
|
public void patternPrecedence() {
|
||||||
// S01E01 pattern has highest precedence
|
// S01E01 pattern has highest precedence
|
||||||
|
@ -25,7 +25,7 @@ public class SeasonEpisodeMatcherTest {
|
||||||
assertEquals(new SxE(1, 2), matcher.match("Test.42.s01e01.s01e02.300").get(1));
|
assertEquals(new SxE(1, 2), matcher.match("Test.42.s01e01.s01e02.300").get(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pattern_1x01() {
|
public void pattern_1x01() {
|
||||||
assertEquals(new SxE(1, 1), matcher.match("1x01").get(0));
|
assertEquals(new SxE(1, 1), matcher.match("1x01").get(0));
|
||||||
|
@ -40,7 +40,7 @@ public class SeasonEpisodeMatcherTest {
|
||||||
assertEquals(new SxE(1, 3), matcher.match("Test_-_103_[1280x720]").get(0));
|
assertEquals(new SxE(1, 3), matcher.match("Test_-_103_[1280x720]").get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pattern_S01E01() {
|
public void pattern_S01E01() {
|
||||||
assertEquals(new SxE(1, 1), matcher.match("S01E01").get(0));
|
assertEquals(new SxE(1, 1), matcher.match("S01E01").get(0));
|
||||||
|
@ -56,7 +56,7 @@ public class SeasonEpisodeMatcherTest {
|
||||||
assertEquals(new SxE(12, 345), matcher.match("Test - S12E345 - High Values").get(0));
|
assertEquals(new SxE(12, 345), matcher.match("Test - S12E345 - High Values").get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pattern_101() {
|
public void pattern_101() {
|
||||||
assertEquals(new SxE(1, 1), matcher.match("Test.101").get(0));
|
assertEquals(new SxE(1, 1), matcher.match("Test.101").get(0));
|
||||||
|
|
Loading…
Reference in New Issue