* try improving support for multi-episodes while trying to not break anything else

This commit is contained in:
Reinhard Pointner 2013-09-18 05:02:55 +00:00
parent 247d6cbe22
commit db11b488c5
2 changed files with 93 additions and 114 deletions

View File

@ -1,7 +1,5 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static java.util.Collections.*; import static java.util.Collections.*;
import java.io.File; import java.io.File;
@ -19,15 +17,13 @@ import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.MultiEpisode; import net.sourceforge.filebot.web.MultiEpisode;
public class EpisodeMatcher extends Matcher<File, Object> { public class EpisodeMatcher extends Matcher<File, Object> {
public EpisodeMatcher(Collection<File> values, Collection<Episode> candidates, boolean strict) { public EpisodeMatcher(Collection<File> values, Collection<Episode> candidates, boolean strict) {
// use strict matcher as to force a result from the final top similarity set // use strict matcher as to force a result from the final top similarity set
super(values, candidates, strict, strict ? StrictEpisodeMetrics.defaultSequence(false) : EpisodeMetrics.defaultSequence(false)); super(values, candidates, strict, strict ? StrictEpisodeMetrics.defaultSequence(false) : EpisodeMetrics.defaultSequence(false));
} }
@Override @Override
protected void deepMatch(Collection<Match<File, Object>> possibleMatches, int level) throws InterruptedException { protected void deepMatch(Collection<Match<File, Object>> possibleMatches, int level) throws InterruptedException {
Map<File, List<Episode>> episodeSets = new IdentityHashMap<File, List<Episode>>(); Map<File, List<Episode>> episodeSets = new IdentityHashMap<File, List<Episode>>();
@ -39,7 +35,7 @@ public class EpisodeMatcher extends Matcher<File, Object> {
} }
episodes.add((Episode) it.getCandidate()); episodes.add((Episode) it.getCandidate());
} }
Map<File, Set<SxE>> episodeIdentifierSets = new IdentityHashMap<File, Set<SxE>>(); Map<File, Set<SxE>> episodeIdentifierSets = new IdentityHashMap<File, Set<SxE>>();
for (Entry<File, List<Episode>> it : episodeSets.entrySet()) { for (Entry<File, List<Episode>> it : episodeSets.entrySet()) {
Set<SxE> sxe = new HashSet<SxE>(it.getValue().size()); Set<SxE> sxe = new HashSet<SxE>(it.getValue().size());
@ -48,16 +44,16 @@ public class EpisodeMatcher extends Matcher<File, Object> {
} }
episodeIdentifierSets.put(it.getKey(), sxe); episodeIdentifierSets.put(it.getKey(), sxe);
} }
boolean modified = false; boolean modified = false;
for (Match<File, Object> it : possibleMatches) { for (Match<File, Object> it : possibleMatches) {
File file = it.getValue(); File file = it.getValue();
Set<SxE> uniqueFiles = parseEpisodeIdentifer(file); Set<SxE> uniqueFiles = parseEpisodeIdentifer(file);
Set<SxE> uniqueEpisodes = episodeIdentifierSets.get(file); Set<SxE> uniqueEpisodes = episodeIdentifierSets.get(file);
if (uniqueFiles.equals(uniqueEpisodes)) { if (uniqueFiles.equals(uniqueEpisodes)) {
Episode[] episodes = episodeSets.get(file).toArray(new Episode[0]); Episode[] episodes = episodeSets.get(file).toArray(new Episode[0]);
if (isMultiEpisode(episodes)) { if (isMultiEpisode(episodes)) {
MultiEpisode episode = new MultiEpisode(episodes); MultiEpisode episode = new MultiEpisode(episodes);
disjointMatchCollection.add(new Match<File, Object>(file, episode)); disjointMatchCollection.add(new Match<File, Object>(file, episode));
@ -65,57 +61,55 @@ public class EpisodeMatcher extends Matcher<File, Object> {
} }
} }
} }
if (modified) { if (modified) {
removeCollected(possibleMatches); removeCollected(possibleMatches);
} }
super.deepMatch(possibleMatches, level); super.deepMatch(possibleMatches, level);
} }
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true); private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, false);
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new HashMap<File, Set<SxE>>(64, 4)); private final Map<File, Set<SxE>> transformCache = synchronizedMap(new HashMap<File, Set<SxE>>(64, 4));
private Set<SxE> parseEpisodeIdentifer(File file) { private Set<SxE> parseEpisodeIdentifer(File file) {
Set<SxE> result = transformCache.get(file); Set<SxE> result = transformCache.get(file);
if (result != null) { if (result != null) {
return result; return result;
} }
List<SxE> sxe = seasonEpisodeMatcher.match(file.getName()); List<SxE> sxe = seasonEpisodeMatcher.match(file.getName());
if (sxe != null) { if (sxe != null) {
result = new HashSet<SxE>(sxe); result = new HashSet<SxE>(sxe);
} else { } else {
result = emptySet(); result = emptySet();
} }
transformCache.put(file, result); transformCache.put(file, result);
return result; return result;
} }
private boolean isMultiEpisode(Episode[] episodes) { private boolean isMultiEpisode(Episode[] episodes) {
// check episode sequence integrity // check episode sequence integrity
Integer seqIndex = null; Integer seqIndex = null;
for (Episode ep : episodes) { for (Episode ep : episodes) {
if (seqIndex != null && !ep.getEpisode().equals(seqIndex + 1)) if (seqIndex != null && !ep.getEpisode().equals(seqIndex + 1))
return false; return false;
seqIndex = ep.getEpisode(); seqIndex = ep.getEpisode();
} }
// check drill-down integrity // check drill-down integrity
String seriesName = null; String seriesName = null;
for (Episode ep : episodes) { for (Episode ep : episodes) {
if (seriesName != null && !seriesName.equals(ep.getSeriesName())) if (seriesName != null && !seriesName.equals(ep.getSeriesName()))
return false; return false;
seriesName = ep.getSeriesName(); seriesName = ep.getSeriesName();
} }
return true; return true;
} }
} }

View File

@ -1,8 +1,5 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static java.util.Arrays.*;
import static java.util.Collections.*; import static java.util.Collections.*;
import static java.util.regex.Pattern.*; import static java.util.regex.Pattern.*;
@ -10,30 +7,30 @@ import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Scanner; import java.util.Scanner;
import java.util.Set;
import java.util.regex.MatchResult; import java.util.regex.MatchResult;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class SeasonEpisodeMatcher { public class SeasonEpisodeMatcher {
public static final SeasonEpisodeFilter DEFAULT_SANITY = new SeasonEpisodeFilter(50, 50, 1000); public static final SeasonEpisodeFilter DEFAULT_SANITY = new SeasonEpisodeFilter(50, 50, 1000);
private SeasonEpisodePattern[] patterns; private SeasonEpisodePattern[] patterns;
private Pattern seasonPattern; private Pattern seasonPattern;
public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) { public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) {
patterns = new SeasonEpisodePattern[5]; patterns = new SeasonEpisodePattern[5];
// match patterns like Season 01 Episode 02, ... // match patterns like Season 01 Episode 02, ...
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})"); patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ... // match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") { patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") {
@Override @Override
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {
List<SxE> matches = new ArrayList<SxE>(2); List<SxE> matches = new ArrayList<SxE>(2);
@ -44,10 +41,10 @@ public class SeasonEpisodeMatcher {
return matches; return matches;
} }
}; };
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ... 1x01-02-03-04, 1x01x02x03x04 ... // match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ... 1x01-02-03-04, 1x01x02x03x04 ...
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[xe.](((?<=[^._ ])\\d{2,3}(\\D|$))+)") { patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[xe.](((?<=[^._ ])\\d{2,3}(\\D|$))+)") {
@Override @Override
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {
List<SxE> matches = new ArrayList<SxE>(2); List<SxE> matches = new ArrayList<SxE>(2);
@ -58,54 +55,61 @@ public class SeasonEpisodeMatcher {
return matches; return matches;
} }
}; };
// match patterns like ep1, ep.1, ... // match patterns like ep1, ep.1, ...
patterns[3] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(?i:e|ep|episode)[^\\p{Alnum}]{0,3}(\\d{1,3})(?!\\p{Digit})") { patterns[3] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(?i:e|ep|episode)[^\\p{Alnum}]{0,3}(\\d{1,3})(?!\\p{Digit})") {
@Override @Override
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {
// regex doesn't match season // regex doesn't match season
return singleton(new SxE(null, match.group(1))); return singleton(new SxE(null, match.group(1)));
} }
}; };
// match patterns like 01, 102, 1003 (enclosed in separators) // match patterns like 01, 102, 1003, 10102 (enclosed in separators)
patterns[4] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-2]?\\d?)(\\d{2})(?!\\p{Alnum})") { patterns[4] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-2]?\\d?)(\\d{2})(\\d{2})?(?!\\p{Alnum})") {
@Override @Override
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {
Set<SxE> sxe = new LinkedHashSet<SxE>(2);
// interpret match as season and episode // interpret match as season and episode
SxE seasonEpisode = new SxE(match.group(1), match.group(2)); for (int i = 2; i <= match.groupCount(); i++) {
if (match.group(i) != null) {
// interpret match as episode number only sxe.add(new SxE(match.group(1), match.group(i)));
SxE absoluteEpisode = new SxE(null, match.group(1) + match.group(2)); }
}
// interpret match both ways, as SxE match as well as episode number only match if it's not an double episode
if (sxe.size() < 2) {
sxe.add(new SxE(null, match.group(1) + match.group(2)));
}
// return both matches, unless they are one and the same // return both matches, unless they are one and the same
return seasonEpisode.equals(absoluteEpisode) ? singleton(seasonEpisode) : asList(seasonEpisode, absoluteEpisode); return sxe;
} }
}; };
// only use S00E00 and SxE pattern in strict mode // only use S00E00 and SxE pattern in strict mode
if (strict) { if (strict) {
patterns = new SeasonEpisodePattern[] { patterns[0], patterns[1], patterns[2] }; patterns = new SeasonEpisodePattern[] { patterns[0], patterns[1], patterns[2] };
} }
// season folder pattern for complementing partial sxe info from filename // season folder pattern for complementing partial sxe info from filename
seasonPattern = compile("Season[-._ ]?(\\d{1,2})", CASE_INSENSITIVE | UNICODE_CASE); seasonPattern = compile("Season[-._ ]?(\\d{1,2})", CASE_INSENSITIVE | UNICODE_CASE);
} }
/** /**
* Try to get season and episode numbers for the given string. * Try to get season and episode numbers for the given string.
* *
* @param name match this string against the a set of know patterns * @param name
* @return the matches returned by the first pattern that returns any matches for this * match this string against the a set of know patterns
* string, or null if no pattern returned any matches * @return the matches returned by the first pattern that returns any matches for this string, or null if no pattern returned any matches
*/ */
public List<SxE> match(CharSequence name) { public List<SxE> match(CharSequence name) {
for (SeasonEpisodePattern pattern : patterns) { for (SeasonEpisodePattern pattern : patterns) {
List<SxE> match = pattern.match(name); List<SxE> match = pattern.match(name);
if (!match.isEmpty()) { if (!match.isEmpty()) {
// current pattern did match // current pattern did match
return match; return match;
@ -113,12 +117,11 @@ public class SeasonEpisodeMatcher {
} }
return null; return null;
} }
public List<SxE> match(File file) { public List<SxE> match(File file) {
for (SeasonEpisodePattern pattern : patterns) { for (SeasonEpisodePattern pattern : patterns) {
List<SxE> match = pattern.match(file.getName()); List<SxE> match = pattern.match(file.getName());
if (!match.isEmpty()) { if (!match.isEmpty()) {
// current pattern did match // current pattern did match
for (int i = 0; i < match.size(); i++) { for (int i = 0; i < match.size(); i++) {
@ -134,57 +137,51 @@ public class SeasonEpisodeMatcher {
} }
return null; return null;
} }
public int find(CharSequence name, int fromIndex) { public int find(CharSequence name, int fromIndex) {
for (SeasonEpisodePattern pattern : patterns) { for (SeasonEpisodePattern pattern : patterns) {
int index = pattern.find(name, fromIndex); int index = pattern.find(name, fromIndex);
if (index >= 0) { if (index >= 0) {
// current pattern did match // current pattern did match
return index; return index;
} }
} }
return -1; return -1;
} }
public Matcher matcher(CharSequence name) { public Matcher matcher(CharSequence name) {
for (SeasonEpisodePattern pattern : patterns) { for (SeasonEpisodePattern pattern : patterns) {
Matcher matcher = pattern.matcher(name); Matcher matcher = pattern.matcher(name);
// check if current pattern matches // check if current pattern matches
if (matcher.find()) { if (matcher.find()) {
// reset matcher state // reset matcher state
return matcher.reset(); return matcher.reset();
} }
} }
return null; return null;
} }
public static class SxE { public static class SxE {
public static final int UNDEFINED = -1; public static final int UNDEFINED = -1;
public final int season; public final int season;
public final int episode; public final int episode;
public SxE(Integer season, Integer episode) { public SxE(Integer season, Integer episode) {
this.season = season != null ? season : UNDEFINED; this.season = season != null ? season : UNDEFINED;
this.episode = episode != null ? episode : UNDEFINED; this.episode = episode != null ? episode : UNDEFINED;
} }
public SxE(String season, String episode) { public SxE(String season, String episode) {
this.season = parse(season); this.season = parse(season);
this.episode = parse(episode); this.episode = parse(episode);
} }
protected int parse(String number) { protected int parse(String number) {
try { try {
return Integer.parseInt(number); return Integer.parseInt(number);
@ -192,80 +189,69 @@ public class SeasonEpisodeMatcher {
return UNDEFINED; return UNDEFINED;
} }
} }
@Override @Override
public boolean equals(Object object) { public boolean equals(Object object) {
if (object instanceof SxE) { if (object instanceof SxE) {
SxE other = (SxE) object; SxE other = (SxE) object;
return this.season == other.season && this.episode == other.episode; return this.season == other.season && this.episode == other.episode;
} }
return false; return false;
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Arrays.hashCode(new Object[] { season, episode }); return Arrays.hashCode(new Object[] { season, episode });
} }
@Override @Override
public String toString() { public String toString() {
return season >= 0 ? String.format("%dx%02d", season, episode) : String.format("%02d", episode); return season >= 0 ? String.format("%dx%02d", season, episode) : String.format("%02d", episode);
} }
} }
public static class SeasonEpisodeFilter { public static class SeasonEpisodeFilter {
public final int seasonLimit; public final int seasonLimit;
public final int seasonEpisodeLimit; public final int seasonEpisodeLimit;
public final int absoluteEpisodeLimit; public final int absoluteEpisodeLimit;
public SeasonEpisodeFilter(int seasonLimit, int seasonEpisodeLimit, int absoluteEpisodeLimit) { public SeasonEpisodeFilter(int seasonLimit, int seasonEpisodeLimit, int absoluteEpisodeLimit) {
this.seasonLimit = seasonLimit; this.seasonLimit = seasonLimit;
this.seasonEpisodeLimit = seasonEpisodeLimit; this.seasonEpisodeLimit = seasonEpisodeLimit;
this.absoluteEpisodeLimit = absoluteEpisodeLimit; this.absoluteEpisodeLimit = absoluteEpisodeLimit;
} }
boolean filter(SxE sxe) { boolean filter(SxE sxe) {
return (sxe.season >= 0 && sxe.season < seasonLimit && sxe.episode < seasonEpisodeLimit) || (sxe.season < 0 && sxe.episode < absoluteEpisodeLimit); return (sxe.season >= 0 && sxe.season < seasonLimit && sxe.episode < seasonEpisodeLimit) || (sxe.season < 0 && sxe.episode < absoluteEpisodeLimit);
} }
} }
public static class SeasonEpisodePattern { public static class SeasonEpisodePattern {
protected final Pattern pattern; protected final Pattern pattern;
protected final SeasonEpisodeFilter sanity; protected final SeasonEpisodeFilter sanity;
public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) { public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) {
this.pattern = Pattern.compile(pattern); this.pattern = Pattern.compile(pattern);
this.sanity = sanity; this.sanity = sanity;
} }
public Matcher matcher(CharSequence name) { public Matcher matcher(CharSequence name) {
return pattern.matcher(name); return pattern.matcher(name);
} }
protected Collection<SxE> process(MatchResult match) { protected Collection<SxE> process(MatchResult match) {
return singleton(new SxE(match.group(1), match.group(2))); return singleton(new SxE(match.group(1), match.group(2)));
} }
public List<SxE> match(CharSequence name) { public List<SxE> match(CharSequence name) {
// name will probably contain no more than two matches // name will probably contain no more than two matches
List<SxE> matches = new ArrayList<SxE>(2); List<SxE> matches = new ArrayList<SxE>(2);
Matcher matcher = matcher(name); Matcher matcher = matcher(name);
while (matcher.find()) { while (matcher.find()) {
for (SxE value : process(matcher)) { for (SxE value : process(matcher)) {
if (sanity == null || sanity.filter(value)) { if (sanity == null || sanity.filter(value)) {
@ -273,14 +259,13 @@ public class SeasonEpisodeMatcher {
} }
} }
} }
return matches; return matches;
} }
public int find(CharSequence name, int fromIndex) { public int find(CharSequence name, int fromIndex) {
Matcher matcher = matcher(name).region(fromIndex, name.length()); Matcher matcher = matcher(name).region(fromIndex, name.length());
while (matcher.find()) { while (matcher.find()) {
for (SxE value : process(matcher)) { for (SxE value : process(matcher)) {
if (sanity == null || sanity.filter(value)) { if (sanity == null || sanity.filter(value)) {
@ -288,9 +273,9 @@ public class SeasonEpisodeMatcher {
} }
} }
} }
return -1; return -1;
} }
} }
} }