From b2e092c697ceb86d0c46351131f05a465b0ad1e1 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 29 Dec 2011 00:41:27 +0000 Subject: [PATCH] * improve series name detection by only focusing on the commond word sequence before the SxE pattern --- .../filebot/cli/ScriptShell.lib.groovy | 8 +-- .../similarity/SeasonEpisodeMatcher.java | 51 ++++++++++--------- .../similarity/SeasonEpisodeMetric.java | 6 +-- .../filebot/similarity/SeriesNameMatcher.java | 13 ++++- .../similarity/SeasonEpisodeMatcherTest.java | 10 ++-- 5 files changed, 51 insertions(+), 37 deletions(-) diff --git a/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy b/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy index c9666ed3..ed96b425 100644 --- a/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy +++ b/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy @@ -15,11 +15,11 @@ File.metaClass.getDir = { getParentFile() } File.metaClass.hasFile = { c -> isDirectory() && listFiles().find(c) } String.metaClass.getFiles = { c -> new File(delegate).getFiles(c) } -File.metaClass.getFiles = { c -> def files = []; traverse(type:FILES) { files += it }; return c ? files.findAll(c) : files } +File.metaClass.getFiles = { c -> def files = []; traverse(type:FILES) { files += it }; return c ? files.findAll(c).sort() : files.sort() } List.metaClass.getFiles = { c -> findResults{ it.getFiles(c) }.flatten().unique() } String.metaClass.getFolders = { c -> new File(delegate).getFolders(c) } -File.metaClass.getFolders = { c -> def folders = []; traverse(type:DIRECTORIES, visitRoot:true) { folders += it }; return c ? folders.findAll(c) : folders } +File.metaClass.getFolders = { c -> def folders = []; traverse(type:DIRECTORIES, visitRoot:true) { folders += it }; return c ? folders.findAll(c).sort() : folders.sort() } List.metaClass.getFolders = { c -> findResults{ it.getFolders(c) }.flatten().unique() } String.metaClass.eachMediaFolder = { c -> new File(delegate).eachMediaFolder(c) } @@ -135,9 +135,9 @@ List.metaClass.watch = { c -> createWatchService(c, delegate, true) } import net.sourceforge.filebot.media.* import net.sourceforge.filebot.similarity.* -def parseEpisodeNumber(path) { +def parseEpisodeNumber(path, strict = true) { def input = path instanceof File ? path.name : path.toString() - def sxe = new SeasonEpisodeMatcher(new SeasonEpisodeMatcher.SeasonEpisodeFilter(30, 50, 1000)).match(input) + def sxe = new SeasonEpisodeMatcher(new SeasonEpisodeMatcher.SeasonEpisodeFilter(30, 50, 1000), strict).match(input) return sxe == null || sxe.isEmpty() ? null : sxe[0] } diff --git a/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java b/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java index ca0a268f..a443a2ab 100644 --- a/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java +++ b/source/net/sourceforge/filebot/similarity/SeasonEpisodeMatcher.java @@ -14,10 +14,10 @@ import java.util.regex.Pattern; public class SeasonEpisodeMatcher { - private final SeasonEpisodePattern[] patterns; + private SeasonEpisodePattern[] patterns; - - public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity) { + + public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) { patterns = new SeasonEpisodePattern[3]; // match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... @@ -38,12 +38,17 @@ public class SeasonEpisodeMatcher { SxE absoluteEpisode = new SxE(null, match.group(1) + match.group(2)); // return both matches, unless they are one and the same - return seasonEpisode.equals(absoluteEpisode) ? Collections.singleton(absoluteEpisode) : Arrays.asList(seasonEpisode, absoluteEpisode); + return seasonEpisode.equals(absoluteEpisode) ? Collections.singleton(seasonEpisode) : Arrays.asList(seasonEpisode, absoluteEpisode); } }; + + // only use S00E00 and SxE pattern in strict mode + if (strict) { + patterns = new SeasonEpisodePattern[] { patterns[0], patterns[1] }; + } } - + /** * Try to get season and episode numbers for the given string. * @@ -64,7 +69,7 @@ public class SeasonEpisodeMatcher { return null; } - + public int find(CharSequence name, int fromIndex) { for (SeasonEpisodePattern pattern : patterns) { int index = pattern.find(name, fromIndex); @@ -78,7 +83,7 @@ public class SeasonEpisodeMatcher { return -1; } - + public Matcher matcher(CharSequence name) { for (SeasonEpisodePattern pattern : patterns) { Matcher matcher = pattern.matcher(name); @@ -93,7 +98,7 @@ public class SeasonEpisodeMatcher { return null; } - + public static class SxE { public static final int UNDEFINED = -1; @@ -101,19 +106,19 @@ public class SeasonEpisodeMatcher { public final int season; public final int episode; - + public SxE(Integer season, Integer episode) { this.season = season != null ? season : UNDEFINED; this.episode = episode != null ? episode : UNDEFINED; } - + public SxE(String season, String episode) { this.season = parse(season); this.episode = parse(episode); } - + protected int parse(String number) { try { return Integer.parseInt(number); @@ -122,7 +127,7 @@ public class SeasonEpisodeMatcher { } } - + @Override public boolean equals(Object object) { if (object instanceof SxE) { @@ -133,62 +138,62 @@ public class SeasonEpisodeMatcher { return false; } - + @Override public int hashCode() { return Arrays.hashCode(new Object[] { season, episode }); } - + @Override public String toString() { return season >= 0 ? String.format("%dx%02d", season, episode) : String.format("%02d", episode); } } - + public static class SeasonEpisodeFilter { public final int seasonLimit; public final int seasonEpisodeLimit; public final int absoluteEpisodeLimit; - + public SeasonEpisodeFilter(int seasonLimit, int seasonEpisodeLimit, int absoluteEpisodeLimit) { this.seasonLimit = seasonLimit; this.seasonEpisodeLimit = seasonEpisodeLimit; this.absoluteEpisodeLimit = absoluteEpisodeLimit; } - + boolean filter(SxE sxe) { return (sxe.season >= 0 && sxe.season < seasonLimit && sxe.episode < seasonEpisodeLimit) || (sxe.season < 0 && sxe.episode < absoluteEpisodeLimit); } } - + public static class SeasonEpisodePattern { protected final Pattern pattern; protected final SeasonEpisodeFilter sanity; - + public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) { this.pattern = Pattern.compile(pattern); this.sanity = sanity; } - + public Matcher matcher(CharSequence name) { return pattern.matcher(name); } - + protected Collection process(MatchResult match) { return Collections.singleton(new SxE(match.group(1), match.group(2))); } - + public List match(CharSequence name) { // name will probably contain no more than two matches List matches = new ArrayList(2); @@ -206,7 +211,7 @@ public class SeasonEpisodeMatcher { return matches; } - + public int find(CharSequence name, int fromIndex) { Matcher matcher = matcher(name).region(fromIndex, name.length()); diff --git a/source/net/sourceforge/filebot/similarity/SeasonEpisodeMetric.java b/source/net/sourceforge/filebot/similarity/SeasonEpisodeMetric.java index 4903ccad..12f96f6a 100644 --- a/source/net/sourceforge/filebot/similarity/SeasonEpisodeMetric.java +++ b/source/net/sourceforge/filebot/similarity/SeasonEpisodeMetric.java @@ -10,9 +10,9 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE; public class SeasonEpisodeMetric implements SimilarityMetric { - private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(null); + private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(null, false); + - @Override public float getSimilarity(Object o1, Object o2) { Collection sxeVector1 = parse(o1); @@ -41,7 +41,7 @@ public class SeasonEpisodeMetric implements SimilarityMetric { return similarity; } - + protected Collection parse(Object object) { if (object instanceof File) { // parse file name diff --git a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java index 422ac354..74201afc 100644 --- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java +++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java @@ -28,7 +28,7 @@ import net.sourceforge.tuned.FileUtilities; public class SeriesNameMatcher { - protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, 1000)); + protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true); protected final NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric(); protected final int commonWordSequenceMaxStartIndex = 3; @@ -63,7 +63,16 @@ public class SeriesNameMatcher { // match common word sequences (likely series names) SeriesNameCollection whitelist = new SeriesNameCollection(); - whitelist.addAll(deepMatchAll(names, threshold)); + + // focus chars before the SxE pattern when matching by common word sequence + String[] focus = Arrays.copyOf(names, names.length); + for (int i = 0; i < focus.length; i++) { + int pos = seasonEpisodeMatcher.find(focus[i], 0); + if (pos >= 0) { + focus[i] = focus[i].substring(0, pos); + } + } + whitelist.addAll(deepMatchAll(focus, threshold)); // 1. use pattern matching seriesNames.addAll(flatMatchAll(names, Pattern.compile(join(whitelist, "|"), Pattern.CASE_INSENSITIVE), threshold, false)); diff --git a/test/net/sourceforge/filebot/similarity/SeasonEpisodeMatcherTest.java b/test/net/sourceforge/filebot/similarity/SeasonEpisodeMatcherTest.java index 3fa250b6..a553c660 100644 --- a/test/net/sourceforge/filebot/similarity/SeasonEpisodeMatcherTest.java +++ b/test/net/sourceforge/filebot/similarity/SeasonEpisodeMatcherTest.java @@ -13,9 +13,9 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE; public class SeasonEpisodeMatcherTest { - private static SeasonEpisodeMatcher matcher = new SeasonEpisodeMatcher(null); + private static SeasonEpisodeMatcher matcher = new SeasonEpisodeMatcher(null, false); + - @Test public void patternPrecedence() { // S01E01 pattern has highest precedence @@ -25,7 +25,7 @@ public class SeasonEpisodeMatcherTest { assertEquals(new SxE(1, 2), matcher.match("Test.42.s01e01.s01e02.300").get(1)); } - + @Test public void pattern_1x01() { assertEquals(new SxE(1, 1), matcher.match("1x01").get(0)); @@ -40,7 +40,7 @@ public class SeasonEpisodeMatcherTest { assertEquals(new SxE(1, 3), matcher.match("Test_-_103_[1280x720]").get(0)); } - + @Test public void pattern_S01E01() { assertEquals(new SxE(1, 1), matcher.match("S01E01").get(0)); @@ -56,7 +56,7 @@ public class SeasonEpisodeMatcherTest { assertEquals(new SxE(12, 345), matcher.match("Test - S12E345 - High Values").get(0)); } - + @Test public void pattern_101() { assertEquals(new SxE(1, 1), matcher.match("Test.101").get(0));