* improve series name detection by only focusing on the commond word sequence before the SxE pattern

This commit is contained in:
Reinhard Pointner 2011-12-29 00:41:27 +00:00
parent 5530bc47f5
commit b2e092c697
5 changed files with 51 additions and 37 deletions

View File

@ -15,11 +15,11 @@ File.metaClass.getDir = { getParentFile() }
File.metaClass.hasFile = { c -> isDirectory() && listFiles().find(c) }
String.metaClass.getFiles = { c -> new File(delegate).getFiles(c) }
File.metaClass.getFiles = { c -> def files = []; traverse(type:FILES) { files += it }; return c ? files.findAll(c) : files }
File.metaClass.getFiles = { c -> def files = []; traverse(type:FILES) { files += it }; return c ? files.findAll(c).sort() : files.sort() }
List.metaClass.getFiles = { c -> findResults{ it.getFiles(c) }.flatten().unique() }
String.metaClass.getFolders = { c -> new File(delegate).getFolders(c) }
File.metaClass.getFolders = { c -> def folders = []; traverse(type:DIRECTORIES, visitRoot:true) { folders += it }; return c ? folders.findAll(c) : folders }
File.metaClass.getFolders = { c -> def folders = []; traverse(type:DIRECTORIES, visitRoot:true) { folders += it }; return c ? folders.findAll(c).sort() : folders.sort() }
List.metaClass.getFolders = { c -> findResults{ it.getFolders(c) }.flatten().unique() }
String.metaClass.eachMediaFolder = { c -> new File(delegate).eachMediaFolder(c) }
@ -135,9 +135,9 @@ List.metaClass.watch = { c -> createWatchService(c, delegate, true) }
import net.sourceforge.filebot.media.*
import net.sourceforge.filebot.similarity.*
def parseEpisodeNumber(path) {
def parseEpisodeNumber(path, strict = true) {
def input = path instanceof File ? path.name : path.toString()
def sxe = new SeasonEpisodeMatcher(new SeasonEpisodeMatcher.SeasonEpisodeFilter(30, 50, 1000)).match(input)
def sxe = new SeasonEpisodeMatcher(new SeasonEpisodeMatcher.SeasonEpisodeFilter(30, 50, 1000), strict).match(input)
return sxe == null || sxe.isEmpty() ? null : sxe[0]
}

View File

@ -14,10 +14,10 @@ import java.util.regex.Pattern;
public class SeasonEpisodeMatcher {
private final SeasonEpisodePattern[] patterns;
private SeasonEpisodePattern[] patterns;
public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity) {
public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) {
patterns = new SeasonEpisodePattern[3];
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ...
@ -38,9 +38,14 @@ public class SeasonEpisodeMatcher {
SxE absoluteEpisode = new SxE(null, match.group(1) + match.group(2));
// return both matches, unless they are one and the same
return seasonEpisode.equals(absoluteEpisode) ? Collections.singleton(absoluteEpisode) : Arrays.asList(seasonEpisode, absoluteEpisode);
return seasonEpisode.equals(absoluteEpisode) ? Collections.singleton(seasonEpisode) : Arrays.asList(seasonEpisode, absoluteEpisode);
}
};
// only use S00E00 and SxE pattern in strict mode
if (strict) {
patterns = new SeasonEpisodePattern[] { patterns[0], patterns[1] };
}
}

View File

@ -10,7 +10,7 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
public class SeasonEpisodeMetric implements SimilarityMetric {
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(null);
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(null, false);
@Override

View File

@ -28,7 +28,7 @@ import net.sourceforge.tuned.FileUtilities;
public class SeriesNameMatcher {
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, 1000));
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
protected final NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
protected final int commonWordSequenceMaxStartIndex = 3;
@ -63,7 +63,16 @@ public class SeriesNameMatcher {
// match common word sequences (likely series names)
SeriesNameCollection whitelist = new SeriesNameCollection();
whitelist.addAll(deepMatchAll(names, threshold));
// focus chars before the SxE pattern when matching by common word sequence
String[] focus = Arrays.copyOf(names, names.length);
for (int i = 0; i < focus.length; i++) {
int pos = seasonEpisodeMatcher.find(focus[i], 0);
if (pos >= 0) {
focus[i] = focus[i].substring(0, pos);
}
}
whitelist.addAll(deepMatchAll(focus, threshold));
// 1. use pattern matching
seriesNames.addAll(flatMatchAll(names, Pattern.compile(join(whitelist, "|"), Pattern.CASE_INSENSITIVE), threshold, false));

View File

@ -13,7 +13,7 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
public class SeasonEpisodeMatcherTest {
private static SeasonEpisodeMatcher matcher = new SeasonEpisodeMatcher(null);
private static SeasonEpisodeMatcher matcher = new SeasonEpisodeMatcher(null, false);
@Test