* improved auto-detection for date-based episodes

This commit is contained in:
Reinhard Pointner 2012-02-09 13:50:14 +00:00
parent 517fa36038
commit b2fbba3a2d
7 changed files with 137 additions and 66 deletions

View File

@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface {
for (File f : mediaFiles) {
// count SxE matches
if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) {
if (nameMatcher.matchByEpisodeIdentifier(f.getName()) != null) {
sxe++;
}

View File

@ -0,0 +1,98 @@
package net.sourceforge.filebot.similarity;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.filebot.web.Date;
public class DateMatcher {
private final DatePattern[] patterns;
public DateMatcher() {
patterns = new DatePattern[2];
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
}
public DateMatcher(DatePattern... patterns) {
this.patterns = patterns;
}
public Date match(CharSequence seq) {
for (DatePattern pattern : patterns) {
Date match = pattern.match(seq);
if (match != null) {
return match;
}
}
return null;
}
public int find(CharSequence seq, int fromIndex) {
for (DatePattern pattern : patterns) {
int pos = pattern.find(seq, fromIndex);
if (pos >= 0) {
return pos;
}
}
return -1;
}
private static class DatePattern {
protected final Pattern pattern;
protected final int[] order;
public DatePattern(String pattern, int[] order) {
this.pattern = Pattern.compile(pattern);
this.order = order;
}
protected Date process(MatchResult match) {
return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
}
public Date match(CharSequence seq) {
Matcher matcher = pattern.matcher(seq);
if (matcher.find()) {
return process(matcher);
}
return null;
}
public int find(CharSequence seq, int fromIndex) {
Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length());
if (matcher.find()) {
return matcher.start();
}
return -1;
}
}
}

View File

@ -3,26 +3,22 @@ package net.sourceforge.filebot.similarity;
import java.io.File;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.filebot.web.Date;
public class DateMetric implements SimilarityMetric {
private final DatePattern[] patterns;
private final DateMatcher matcher;
public DateMetric() {
patterns = new DatePattern[2];
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
this.matcher = new DateMatcher();
}
public DateMetric(DateMatcher matcher) {
this.matcher = matcher;
}
@ -46,49 +42,7 @@ public class DateMetric implements SimilarityMetric {
object = ((File) object).getName();
}
return match(object.toString());
}
public Date match(CharSequence name) {
for (DatePattern pattern : patterns) {
Date match = pattern.match(name);
if (match != null) {
return match;
}
}
return null;
}
protected static class DatePattern {
protected final Pattern pattern;
protected final int[] order;
public DatePattern(String pattern, int[] order) {
this.pattern = Pattern.compile(pattern);
this.order = order;
}
protected Date process(MatchResult match) {
return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
}
public Date match(CharSequence name) {
Matcher matcher = pattern.matcher(name);
if (matcher.find()) {
return process(matcher);
}
return null;
}
return matcher.match(object.toString());
}
}

View File

@ -24,7 +24,7 @@ public class SeasonEpisodeMatcher {
patterns[0] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](\\d{1,3})(?!\\p{Digit})");
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ...
patterns[1] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
patterns[1] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
// match patterns like 01, 102, 1003 (enclosed in separators)
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-1]?\\d?)(\\d{2})(?!\\p{Alnum})") {

View File

@ -31,6 +31,7 @@ import net.sourceforge.tuned.FileUtilities;
public class SeriesNameMatcher {
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
protected DateMatcher dateMatcher = new DateMatcher();
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
protected int commonWordSequenceMaxStartIndex;
@ -83,12 +84,17 @@ public class SeriesNameMatcher {
// match common word sequences (likely series names)
SeriesNameCollection whitelist = new SeriesNameCollection();
// focus chars before the SxE pattern when matching by common word sequence
// focus chars before the SxE / Date pattern when matching by common word sequence
String[] focus = Arrays.copyOf(names, names.length);
for (int i = 0; i < focus.length; i++) {
int pos = seasonEpisodeMatcher.find(focus[i], 0);
if (pos >= 0) {
focus[i] = focus[i].substring(0, pos);
int sxePos = seasonEpisodeMatcher.find(focus[i], 0);
if (sxePos >= 0) {
focus[i] = focus[i].substring(0, sxePos);
} else {
int datePos = dateMatcher.find(focus[i], 0);
if (datePos >= 0) {
focus[i] = focus[i].substring(0, datePos);
}
}
}
whitelist.addAll(deepMatchAll(focus, threshold));
@ -118,8 +124,9 @@ public class SeriesNameMatcher {
name = normalize(name);
Matcher prefix = prefixPattern.matcher(name);
int sxePosition = seasonEpisodeMatcher.find(name, prefix.find() ? prefix.end() : 0);
int prefixEnd = prefix.find() ? prefix.end() : 0;
int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd);
if (sxePosition > 0) {
String hit = name.substring(0, sxePosition).trim();
List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
@ -131,7 +138,14 @@ public class SeriesNameMatcher {
// require multiple matches, if hit might be a false match
thresholdCollection.add(hit);
}
} else {
// try date pattern as fallback
int datePosition = dateMatcher.find(name, prefixEnd);
if (datePosition > 0) {
thresholdCollection.addDirect(name.substring(0, datePosition).trim());
}
}
}
return thresholdCollection;
@ -176,14 +190,19 @@ public class SeriesNameMatcher {
* @return a substring of the given name that ends before the first occurrence of a season
* episode pattern, or null if there is no such pattern
*/
public String matchBySeasonEpisodePattern(String name) {
public String matchByEpisodeIdentifier(String name) {
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
if (seasonEpisodePosition > 0) {
// series name ends at the first season episode pattern
return normalize(name.substring(0, seasonEpisodePosition));
}
int datePosition = dateMatcher.find(name, 0);
if (datePosition > 0) {
// series name ends at the first season episode pattern
return normalize(name.substring(0, datePosition));
}
return null;
}

View File

@ -233,7 +233,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
// require user input if auto-detection has failed or has been disabled
if (episodes.isEmpty()) {
String suggestion = new SeriesNameMatcher().matchBySeasonEpisodePattern(getName(files.get(0)));
String suggestion = new SeriesNameMatcher().matchByEpisodeIdentifier(getName(files.get(0)));
if (suggestion != null) {
// clean media info / release group info / etc
suggestion = stripReleaseInfo(suggestion);

View File

@ -34,10 +34,10 @@ public class SeriesNameMatcherTest {
@Test
public void matchBeforeSeasonEpisodePattern() {
assertEquals("The Test", matcher.matchBySeasonEpisodePattern("The Test - 1x01"));
assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01"));
// real world test
assertEquals("Mushishi", matcher.matchBySeasonEpisodePattern("[niizk]_Mushishi_-_01_-_The_Green_Gathering"));
assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_01_-_The_Green_Gathering"));
}