+ experimental support for multi-episode files

This commit is contained in:
Reinhard Pointner 2012-03-17 19:02:04 +00:00
parent c739552c9a
commit 677ac82d58
11 changed files with 298 additions and 38 deletions

View File

@ -49,6 +49,7 @@ import net.sourceforge.filebot.hash.HashType;
import net.sourceforge.filebot.hash.VerificationFileReader;
import net.sourceforge.filebot.hash.VerificationFileWriter;
import net.sourceforge.filebot.media.ReleaseInfo;
import net.sourceforge.filebot.similarity.EpisodeMatcher;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
@ -56,7 +57,6 @@ import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityComparator;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.similarity.StrictEpisodeMetrics;
import net.sourceforge.filebot.subtitle.SubtitleFormat;
import net.sourceforge.filebot.ui.Language;
import net.sourceforge.filebot.ui.rename.HistorySpooler;
@ -142,8 +142,7 @@ public class CmdlineOperations implements CmdlineInterface {
List<File> mediaFiles = filter(files, VIDEO_FILES, SUBTITLE_FILES);
// similarity metrics for matching
SimilarityMetric[] sequence = strict ? StrictEpisodeMetrics.defaultSequence(false) : EpisodeMetrics.defaultSequence(false);
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
List<Match<File, Object>> matches = new ArrayList<Match<File, Object>>();
// auto-determine optimal batch sets
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
@ -169,8 +168,8 @@ public class CmdlineOperations implements CmdlineInterface {
Set<Episode> episodes = fetchEpisodeSet(db, seriesNames, sortOrder, locale, strict);
if (episodes.size() > 0) {
matches.addAll(matchEpisodes(filter(batch, VIDEO_FILES), episodes, sequence));
matches.addAll(matchEpisodes(filter(batch, SUBTITLE_FILES), episodes, sequence));
matches.addAll(matchEpisodes(filter(batch, VIDEO_FILES), episodes, strict));
matches.addAll(matchEpisodes(filter(batch, SUBTITLE_FILES), episodes, strict));
} else {
CLILogger.warning("Failed to fetch episode data: " + seriesNames);
}
@ -184,9 +183,9 @@ public class CmdlineOperations implements CmdlineInterface {
// map old files to new paths by applying formatting and validating filenames
Map<File, File> renameMap = new LinkedHashMap<File, File>();
for (Match<File, Episode> match : matches) {
for (Match<File, Object> match : matches) {
File file = match.getValue();
Episode episode = match.getCandidate();
Object episode = match.getCandidate();
String newName = (format != null) ? format.format(new MediaBindingBean(episode, file)) : validateFileName(EpisodeFormat.SeasonEpisode.format(episode));
File newFile = new File(outputDir, newName + "." + getExtension(file));
@ -204,10 +203,10 @@ public class CmdlineOperations implements CmdlineInterface {
}
private List<Match<File, Episode>> matchEpisodes(Collection<File> files, Collection<Episode> episodes, SimilarityMetric[] sequence) throws Exception {
private List<Match<File, Object>> matchEpisodes(Collection<File> files, Collection<Episode> episodes, boolean strict) throws Exception {
// always use strict fail-fast matcher
Matcher<File, Episode> matcher = new Matcher<File, Episode>(files, episodes, true, sequence);
List<Match<File, Episode>> matches = matcher.match();
EpisodeMatcher matcher = new EpisodeMatcher(files, episodes, strict);
List<Match<File, Object>> matches = matcher.match();
for (File failedMatch : matcher.remainingValues()) {
CLILogger.warning("No matching episode: " + failedMatch.getName());

View File

@ -2,16 +2,22 @@
package net.sourceforge.filebot.format;
import static java.util.Arrays.*;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.format.Define.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.filebot.web.EpisodeFormat.*;
import static net.sourceforge.tuned.FileUtilities.*;
import static net.sourceforge.tuned.StringUtilities.*;
import java.io.File;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.Set;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
@ -25,6 +31,7 @@ import net.sourceforge.filebot.web.Date;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.MoviePart;
import net.sourceforge.filebot.web.MultiEpisode;
import net.sourceforge.filebot.web.SortOrder;
import net.sourceforge.tuned.FileUtilities;
@ -98,7 +105,11 @@ public class MediaBindingBean {
@Define("t")
public String getTitle() {
return getEpisode().getTitle();
Set<String> title = new LinkedHashSet<String>();
for (Episode it : getEpisodes()) {
title.add(removeTrailingBrackets(it.getTitle()));
}
return join(title, " & ");
}
@ -352,7 +363,7 @@ public class MediaBindingBean {
}
@Define("episodes")
@Define("episodelist")
public Object getEpisodeList() throws Exception {
return WebServices.TheTVDB.getEpisodeList(WebServices.TheTVDB.search(getEpisode().getSeriesName()).get(0), SortOrder.Airdate, Locale.ENGLISH);
}
@ -388,6 +399,12 @@ public class MediaBindingBean {
}
@Define("episodes")
public List<Episode> getEpisodes() {
return infoObject instanceof MultiEpisode ? ((MultiEpisode) infoObject).getEpisodes() : asList(getEpisode());
}
@Define("movie")
public Movie getMovie() {
return (Movie) infoObject;

View File

@ -0,0 +1,89 @@
package net.sourceforge.filebot.similarity;
import static java.util.Collections.*;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.WeakHashMap;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.MultiEpisode;
public class EpisodeMatcher extends Matcher<File, Object> {
public EpisodeMatcher(Collection<File> values, Collection<Episode> candidates, boolean strictMetrics) {
super(values, candidates, true, strictMetrics ? StrictEpisodeMetrics.defaultSequence(false) : EpisodeMetrics.defaultSequence(false));
}
@Override
protected void deepMatch(Collection<Match<File, Object>> possibleMatches, int level) throws InterruptedException {
Map<File, List<Episode>> episodeSets = new IdentityHashMap<File, List<Episode>>();
for (Match<File, Object> it : possibleMatches) {
List<Episode> episodes = episodeSets.get(it.getValue());
if (episodes == null) {
episodes = new ArrayList<Episode>();
episodeSets.put(it.getValue(), episodes);
}
episodes.add((Episode) it.getCandidate());
}
Map<File, Set<SxE>> episodeIdentifierSets = new IdentityHashMap<File, Set<SxE>>();
for (Entry<File, List<Episode>> it : episodeSets.entrySet()) {
Set<SxE> sxe = new HashSet<SxE>(it.getValue().size());
for (Episode ep : it.getValue()) {
sxe.add(new SxE(ep.getSeason(), ep.getEpisode()));
}
episodeIdentifierSets.put(it.getKey(), sxe);
}
for (Iterator<Match<File, Object>> itr = possibleMatches.iterator(); itr.hasNext();) {
File file = itr.next().getValue();
Set<SxE> uniqueFiles = parseEpisodeIdentifer(file);
Set<SxE> uniqueEpisodes = episodeIdentifierSets.get(file);
if (uniqueFiles.equals(uniqueEpisodes)) {
MultiEpisode episode = new MultiEpisode(episodeSets.get(file).toArray(new Episode[0]));
disjointMatchCollection.add(new Match<File, Object>(file, episode));
itr.remove();
}
}
super.deepMatch(possibleMatches, level);
}
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new WeakHashMap<File, Set<SxE>>(64, 4));
private Set<SxE> parseEpisodeIdentifer(File file) {
Set<SxE> result = transformCache.get(file);
if (result != null) {
return result;
}
List<SxE> sxe = seasonEpisodeMatcher.match(file.getName());
if (sxe != null) {
result = new HashSet<SxE>(sxe);
} else {
result = emptySet();
}
transformCache.put(file, result);
return result;
}
}

View File

@ -23,13 +23,13 @@ import java.util.TreeMap;
public class Matcher<V, C> {
private final List<V> values;
private final List<C> candidates;
protected final List<V> values;
protected final List<C> candidates;
private final boolean strict;
private final SimilarityMetric[] metrics;
protected final boolean strict;
protected final SimilarityMetric[] metrics;
private final DisjointMatchCollection<V, C> disjointMatchCollection;
protected final DisjointMatchCollection<V, C> disjointMatchCollection;
public Matcher(Collection<? extends V> values, Collection<? extends C> candidates, boolean strict, SimilarityMetric[] metrics) {

View File

@ -40,4 +40,10 @@ public class Normalization {
return checksum.matcher(string).replaceAll("");
}
public static String removeTrailingBrackets(String name) {
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
return name.replaceAll("[(]([^)]*)[)]$", "").trim();
}
}

View File

@ -9,6 +9,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -27,11 +28,33 @@ public class SeasonEpisodeMatcher {
// match patterns like Season 01 Episode 02, ...
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ...
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](\\d{1,3})(?!\\p{Digit})");
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee]((\\d{1,2}\\D?)+)(?!\\p{Digit})") {
@Override
protected Collection<SxE> process(MatchResult match) {
List<SxE> matches = new ArrayList<SxE>(2);
Scanner epno = new Scanner(match.group(2)).useDelimiter("\\D+");
while (epno.hasNext()) {
matches.add(new SxE(match.group(1), epno.next()));
}
return matches;
}
};
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ...
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ... 1x01-02-03-04, 1x01x02x03x04 ...
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[x.]((\\d{2,3}\\D?)+)(?!\\p{Digit})") {
@Override
protected Collection<SxE> process(MatchResult match) {
List<SxE> matches = new ArrayList<SxE>(2);
Scanner epno = new Scanner(match.group(2)).useDelimiter("\\D+");
while (epno.hasNext()) {
matches.add(new SxE(match.group(1), epno.next()));
}
return matches;
}
};
// match patterns like ep1, ep.1, ...
patterns[3] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(?i:ep|episode)[^\\p{Alnum}]{0,3}(\\d{1,3})(?!\\p{Digit})") {

View File

@ -5,7 +5,7 @@ package net.sourceforge.filebot.ui.rename;
import static java.util.Collections.*;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.media.MediaDetection.*;
import static net.sourceforge.filebot.web.EpisodeUtilities.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.FileUtilities.*;
import static net.sourceforge.tuned.ui.TunedUtilities.*;
@ -35,9 +35,8 @@ import javax.swing.Action;
import javax.swing.SwingUtilities;
import net.sourceforge.filebot.Analytics;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.EpisodeMatcher;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityMetric;
@ -289,7 +288,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
// group by subtitles first and then by files in general
for (List<File> filesPerType : mapByExtension(files).values()) {
Matcher<File, Episode> matcher = new Matcher<File, Episode>(filesPerType, episodes, false, EpisodeMetrics.defaultSequence(false));
EpisodeMatcher matcher = new EpisodeMatcher(filesPerType, episodes, false);
matches.addAll(matcher.match());
}

View File

@ -2,10 +2,14 @@
package net.sourceforge.filebot.web;
import static net.sourceforge.tuned.StringUtilities.*;
import java.text.FieldPosition;
import java.text.Format;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -17,15 +21,19 @@ public class EpisodeFormat extends Format {
private final boolean includeAirdate;
private final boolean includeSpecial;
public EpisodeFormat(boolean includeSpecial, boolean includeAirdate) {
this.includeSpecial = includeSpecial;
this.includeAirdate = includeAirdate;
}
@Override
public StringBuffer format(Object obj, StringBuffer sb, FieldPosition pos) {
if (obj instanceof MultiEpisode) {
return sb.append(formatMultiEpisode(((MultiEpisode) obj).getEpisodes()));
}
// format episode object, e.g. Dark Angel - 3x01 - Labyrinth [2009-06-01]
Episode episode = (Episode) obj;
@ -58,8 +66,12 @@ public class EpisodeFormat extends Format {
return sb;
}
public String formatSxE(Episode episode) {
if (episode instanceof MultiEpisode) {
return formatMultiSxE(((MultiEpisode) episode).getEpisodes());
}
StringBuilder sb = new StringBuilder();
if (episode.getSeason() != null) {
@ -75,8 +87,12 @@ public class EpisodeFormat extends Format {
return sb.toString();
}
public String formatS00E00(Episode episode) {
if (episode instanceof MultiEpisode) {
return formatMultiS00E00(((MultiEpisode) episode).getEpisodes());
}
StringBuilder sb = new StringBuilder();
if (episode.getSeason() != null) {
@ -93,11 +109,60 @@ public class EpisodeFormat extends Format {
return sb.toString();
}
public String formatMultiEpisode(Iterable<Episode> episodes) {
Set<String> name = new LinkedHashSet<String>();
Set<String> sxe = new LinkedHashSet<String>();
Set<String> title = new LinkedHashSet<String>();
for (Episode it : episodes) {
name.add(it.getSeriesName());
sxe.add(formatSxE(it));
title.add(it.getTitle().replaceAll("[(]([^)]*)[)]$", "").trim());
}
return String.format("%s - %s - %s", join(name, " & "), join(sxe, " & "), join(title, " & "));
}
public String formatMultiSxE(Iterable<Episode> episodes) {
StringBuilder sb = new StringBuilder();
Integer ps = null;
for (Episode it : episodes) {
if (!it.getSeason().equals(ps)) {
if (sb.length() > 0) {
sb.append(' ');
}
sb.append(it.getSeason()).append('x').append(String.format("%02d", it.getEpisode()));
} else {
sb.append('-').append(String.format("%02d", it.getEpisode()));
}
ps = it.getSeason();
}
return sb.toString();
}
public String formatMultiS00E00(Iterable<Episode> episodes) {
StringBuilder sb = new StringBuilder();
Integer ps = null;
for (Episode it : episodes) {
if (!it.getSeason().equals(ps)) {
sb.append(String.format("S%02d", it.getSeason())).append(String.format("E%02d", it.getEpisode()));
} else {
sb.append('-').append(String.format("E%02d", it.getEpisode()));
}
ps = it.getSeason();
}
return sb.toString();
}
private final Pattern sxePattern = Pattern.compile("- (?:(\\d{1,2})x)?(Special )?(\\d{1,3}) -");
private final Pattern airdatePattern = Pattern.compile("\\[(\\d{4}-\\d{1,2}-\\d{1,2})\\]");
@Override
public Episode parseObject(String s, ParsePosition pos) {
StringBuilder source = new StringBuilder(s);
@ -137,7 +202,7 @@ public class EpisodeFormat extends Format {
return null;
}
@Override
public Episode parseObject(String source) throws ParseException {
return (Episode) super.parseObject(source);

View File

@ -10,12 +10,6 @@ import java.util.List;
public final class EpisodeUtilities {
public static String removeTrailingBrackets(String name) {
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
return name.replaceAll("[(]([^)]*)[)]", "").trim();
}
public static List<Episode> filterBySeason(Iterable<Episode> episodes, int season) {
List<Episode> results = new ArrayList<Episode>(25);

View File

@ -0,0 +1,46 @@
package net.sourceforge.filebot.web;
import java.util.Arrays;
import java.util.List;
public class MultiEpisode extends Episode {
private Episode[] episodes;
public MultiEpisode(Episode... episodes) {
super(episodes[0]);
this.episodes = episodes;
}
public List<Episode> getEpisodes() {
return Arrays.asList(episodes);
}
@Override
public boolean equals(Object obj) {
if (obj instanceof MultiEpisode) {
MultiEpisode other = (MultiEpisode) obj;
return Arrays.equals(episodes, other.episodes);
}
return false;
}
@Override
public int hashCode() {
return Arrays.hashCode(episodes);
}
@Override
public String toString() {
return EpisodeFormat.SeasonEpisode.formatMultiEpisode(getEpisodes());
}
}

View File

@ -77,4 +77,26 @@ public class SeasonEpisodeMatcherTest {
assertEquals(asList(new SxE(1, 1), new SxE(UNDEFINED, 101)), matcher.match("Test.101"));
}
@Test
public void multiEpisodePatterns() {
assertEquals(new SxE(1, 1), matcher.match("s01e01-02-03-04").get(0));
assertEquals(new SxE(1, 4), matcher.match("s01e01-02-03-04").get(3));
assertEquals(new SxE(1, 1), matcher.match("s01e01e02e03e04").get(0));
assertEquals(new SxE(1, 4), matcher.match("s01e01e02e03e04").get(3));
assertEquals(new SxE(1, 1), matcher.match("1x01-02-03-04").get(0));
assertEquals(new SxE(1, 4), matcher.match("1x01-02-03-04").get(3));
assertEquals(new SxE(1, 1), matcher.match("1x01x02x03x04").get(0));
assertEquals(new SxE(1, 4), matcher.match("1x01x02x03x04").get(3));
assertEquals(new SxE(1, 1), matcher.match("[s01]_[e01-02-03-04]").get(0));
assertEquals(new SxE(1, 4), matcher.match("[s01]_[e01-02-03-04]").get(3));
assertEquals(new SxE(1, 1), matcher.match("1x01.1x02.1x03.1x04").get(0));
assertEquals(new SxE(1, 4), matcher.match("1x01.1x02.1x03.1x04").get(3));
}
}