* improved auto-detection for date-based episodes
This commit is contained in:
parent
517fa36038
commit
b2fbba3a2d
|
@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
||||||
|
|
||||||
for (File f : mediaFiles) {
|
for (File f : mediaFiles) {
|
||||||
// count SxE matches
|
// count SxE matches
|
||||||
if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) {
|
if (nameMatcher.matchByEpisodeIdentifier(f.getName()) != null) {
|
||||||
sxe++;
|
sxe++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
|
||||||
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.regex.MatchResult;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import net.sourceforge.filebot.web.Date;
|
||||||
|
|
||||||
|
|
||||||
|
public class DateMatcher {
|
||||||
|
|
||||||
|
private final DatePattern[] patterns;
|
||||||
|
|
||||||
|
|
||||||
|
public DateMatcher() {
|
||||||
|
patterns = new DatePattern[2];
|
||||||
|
|
||||||
|
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
|
||||||
|
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
|
||||||
|
|
||||||
|
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
|
||||||
|
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public DateMatcher(DatePattern... patterns) {
|
||||||
|
this.patterns = patterns;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Date match(CharSequence seq) {
|
||||||
|
for (DatePattern pattern : patterns) {
|
||||||
|
Date match = pattern.match(seq);
|
||||||
|
|
||||||
|
if (match != null) {
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int find(CharSequence seq, int fromIndex) {
|
||||||
|
for (DatePattern pattern : patterns) {
|
||||||
|
int pos = pattern.find(seq, fromIndex);
|
||||||
|
|
||||||
|
if (pos >= 0) {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static class DatePattern {
|
||||||
|
|
||||||
|
protected final Pattern pattern;
|
||||||
|
protected final int[] order;
|
||||||
|
|
||||||
|
|
||||||
|
public DatePattern(String pattern, int[] order) {
|
||||||
|
this.pattern = Pattern.compile(pattern);
|
||||||
|
this.order = order;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected Date process(MatchResult match) {
|
||||||
|
return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Date match(CharSequence seq) {
|
||||||
|
Matcher matcher = pattern.matcher(seq);
|
||||||
|
|
||||||
|
if (matcher.find()) {
|
||||||
|
return process(matcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int find(CharSequence seq, int fromIndex) {
|
||||||
|
Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length());
|
||||||
|
|
||||||
|
if (matcher.find()) {
|
||||||
|
return matcher.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -3,26 +3,22 @@ package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.regex.MatchResult;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import net.sourceforge.filebot.web.Date;
|
import net.sourceforge.filebot.web.Date;
|
||||||
|
|
||||||
|
|
||||||
public class DateMetric implements SimilarityMetric {
|
public class DateMetric implements SimilarityMetric {
|
||||||
|
|
||||||
private final DatePattern[] patterns;
|
private final DateMatcher matcher;
|
||||||
|
|
||||||
|
|
||||||
public DateMetric() {
|
public DateMetric() {
|
||||||
patterns = new DatePattern[2];
|
this.matcher = new DateMatcher();
|
||||||
|
}
|
||||||
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
|
|
||||||
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
|
|
||||||
|
public DateMetric(DateMatcher matcher) {
|
||||||
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
|
this.matcher = matcher;
|
||||||
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,49 +42,7 @@ public class DateMetric implements SimilarityMetric {
|
||||||
object = ((File) object).getName();
|
object = ((File) object).getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
return match(object.toString());
|
return matcher.match(object.toString());
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public Date match(CharSequence name) {
|
|
||||||
for (DatePattern pattern : patterns) {
|
|
||||||
Date match = pattern.match(name);
|
|
||||||
|
|
||||||
if (match != null) {
|
|
||||||
return match;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected static class DatePattern {
|
|
||||||
|
|
||||||
protected final Pattern pattern;
|
|
||||||
protected final int[] order;
|
|
||||||
|
|
||||||
|
|
||||||
public DatePattern(String pattern, int[] order) {
|
|
||||||
this.pattern = Pattern.compile(pattern);
|
|
||||||
this.order = order;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected Date process(MatchResult match) {
|
|
||||||
return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public Date match(CharSequence name) {
|
|
||||||
Matcher matcher = pattern.matcher(name);
|
|
||||||
|
|
||||||
if (matcher.find()) {
|
|
||||||
return process(matcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ public class SeasonEpisodeMatcher {
|
||||||
patterns[0] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](\\d{1,3})(?!\\p{Digit})");
|
patterns[0] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](\\d{1,3})(?!\\p{Digit})");
|
||||||
|
|
||||||
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ...
|
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ...
|
||||||
patterns[1] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
|
patterns[1] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
|
||||||
|
|
||||||
// match patterns like 01, 102, 1003 (enclosed in separators)
|
// match patterns like 01, 102, 1003 (enclosed in separators)
|
||||||
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-1]?\\d?)(\\d{2})(?!\\p{Alnum})") {
|
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-1]?\\d?)(\\d{2})(?!\\p{Alnum})") {
|
||||||
|
|
|
@ -31,6 +31,7 @@ import net.sourceforge.tuned.FileUtilities;
|
||||||
public class SeriesNameMatcher {
|
public class SeriesNameMatcher {
|
||||||
|
|
||||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
||||||
|
protected DateMatcher dateMatcher = new DateMatcher();
|
||||||
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||||
|
|
||||||
protected int commonWordSequenceMaxStartIndex;
|
protected int commonWordSequenceMaxStartIndex;
|
||||||
|
@ -83,12 +84,17 @@ public class SeriesNameMatcher {
|
||||||
// match common word sequences (likely series names)
|
// match common word sequences (likely series names)
|
||||||
SeriesNameCollection whitelist = new SeriesNameCollection();
|
SeriesNameCollection whitelist = new SeriesNameCollection();
|
||||||
|
|
||||||
// focus chars before the SxE pattern when matching by common word sequence
|
// focus chars before the SxE / Date pattern when matching by common word sequence
|
||||||
String[] focus = Arrays.copyOf(names, names.length);
|
String[] focus = Arrays.copyOf(names, names.length);
|
||||||
for (int i = 0; i < focus.length; i++) {
|
for (int i = 0; i < focus.length; i++) {
|
||||||
int pos = seasonEpisodeMatcher.find(focus[i], 0);
|
int sxePos = seasonEpisodeMatcher.find(focus[i], 0);
|
||||||
if (pos >= 0) {
|
if (sxePos >= 0) {
|
||||||
focus[i] = focus[i].substring(0, pos);
|
focus[i] = focus[i].substring(0, sxePos);
|
||||||
|
} else {
|
||||||
|
int datePos = dateMatcher.find(focus[i], 0);
|
||||||
|
if (datePos >= 0) {
|
||||||
|
focus[i] = focus[i].substring(0, datePos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
whitelist.addAll(deepMatchAll(focus, threshold));
|
whitelist.addAll(deepMatchAll(focus, threshold));
|
||||||
|
@ -118,8 +124,9 @@ public class SeriesNameMatcher {
|
||||||
name = normalize(name);
|
name = normalize(name);
|
||||||
|
|
||||||
Matcher prefix = prefixPattern.matcher(name);
|
Matcher prefix = prefixPattern.matcher(name);
|
||||||
int sxePosition = seasonEpisodeMatcher.find(name, prefix.find() ? prefix.end() : 0);
|
int prefixEnd = prefix.find() ? prefix.end() : 0;
|
||||||
|
|
||||||
|
int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd);
|
||||||
if (sxePosition > 0) {
|
if (sxePosition > 0) {
|
||||||
String hit = name.substring(0, sxePosition).trim();
|
String hit = name.substring(0, sxePosition).trim();
|
||||||
List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
|
List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
|
||||||
|
@ -131,7 +138,14 @@ public class SeriesNameMatcher {
|
||||||
// require multiple matches, if hit might be a false match
|
// require multiple matches, if hit might be a false match
|
||||||
thresholdCollection.add(hit);
|
thresholdCollection.add(hit);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// try date pattern as fallback
|
||||||
|
int datePosition = dateMatcher.find(name, prefixEnd);
|
||||||
|
if (datePosition > 0) {
|
||||||
|
thresholdCollection.addDirect(name.substring(0, datePosition).trim());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return thresholdCollection;
|
return thresholdCollection;
|
||||||
|
@ -176,14 +190,19 @@ public class SeriesNameMatcher {
|
||||||
* @return a substring of the given name that ends before the first occurrence of a season
|
* @return a substring of the given name that ends before the first occurrence of a season
|
||||||
* episode pattern, or null if there is no such pattern
|
* episode pattern, or null if there is no such pattern
|
||||||
*/
|
*/
|
||||||
public String matchBySeasonEpisodePattern(String name) {
|
public String matchByEpisodeIdentifier(String name) {
|
||||||
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
||||||
|
|
||||||
if (seasonEpisodePosition > 0) {
|
if (seasonEpisodePosition > 0) {
|
||||||
// series name ends at the first season episode pattern
|
// series name ends at the first season episode pattern
|
||||||
return normalize(name.substring(0, seasonEpisodePosition));
|
return normalize(name.substring(0, seasonEpisodePosition));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int datePosition = dateMatcher.find(name, 0);
|
||||||
|
if (datePosition > 0) {
|
||||||
|
// series name ends at the first season episode pattern
|
||||||
|
return normalize(name.substring(0, datePosition));
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -233,7 +233,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
||||||
|
|
||||||
// require user input if auto-detection has failed or has been disabled
|
// require user input if auto-detection has failed or has been disabled
|
||||||
if (episodes.isEmpty()) {
|
if (episodes.isEmpty()) {
|
||||||
String suggestion = new SeriesNameMatcher().matchBySeasonEpisodePattern(getName(files.get(0)));
|
String suggestion = new SeriesNameMatcher().matchByEpisodeIdentifier(getName(files.get(0)));
|
||||||
if (suggestion != null) {
|
if (suggestion != null) {
|
||||||
// clean media info / release group info / etc
|
// clean media info / release group info / etc
|
||||||
suggestion = stripReleaseInfo(suggestion);
|
suggestion = stripReleaseInfo(suggestion);
|
||||||
|
|
|
@ -34,10 +34,10 @@ public class SeriesNameMatcherTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void matchBeforeSeasonEpisodePattern() {
|
public void matchBeforeSeasonEpisodePattern() {
|
||||||
assertEquals("The Test", matcher.matchBySeasonEpisodePattern("The Test - 1x01"));
|
assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01"));
|
||||||
|
|
||||||
// real world test
|
// real world test
|
||||||
assertEquals("Mushishi", matcher.matchBySeasonEpisodePattern("[niizk]_Mushishi_-_01_-_The_Green_Gathering"));
|
assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_01_-_The_Green_Gathering"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue