* lots of improvements to subtitle-automatching esp. when handling movies

This commit is contained in:
Reinhard Pointner 2012-07-16 10:09:21 +00:00
parent 8fa867ae49
commit 8bd737ae71
6 changed files with 103 additions and 47 deletions

View File

@ -59,9 +59,7 @@ import net.sourceforge.filebot.hash.VerificationFileReader;
import net.sourceforge.filebot.hash.VerificationFileWriter; import net.sourceforge.filebot.hash.VerificationFileWriter;
import net.sourceforge.filebot.media.MediaDetection; import net.sourceforge.filebot.media.MediaDetection;
import net.sourceforge.filebot.similarity.EpisodeMatcher; import net.sourceforge.filebot.similarity.EpisodeMatcher;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match; import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.NameSimilarityMetric; import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityComparator; import net.sourceforge.filebot.similarity.SimilarityComparator;
@ -711,28 +709,21 @@ public class CmdlineOperations implements CmdlineInterface {
private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception { private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception {
Map<File, SubtitleDescriptor> subtitleByVideo = new HashMap<File, SubtitleDescriptor>();
// search for subtitles // search for subtitles
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName()); List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName());
// match subtitle files to video files // match subtitle files to video files
if (subtitles.size() > 0) { if (subtitles.size() > 0) {
// first match everything as best as possible, then filter possibly bad matches Map<File, SubtitleDescriptor> subtitleByVideo = matchSubtitles(videoFiles, subtitles, strict);
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(videoFiles, subtitles, false, EpisodeMetrics.defaultSequence(true)); for (Entry<File, SubtitleDescriptor> it : subtitleByVideo.entrySet()) {
SimilarityMetric sanity = EpisodeMetrics.verificationMetric(); CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getKey().getName(), it.getValue().getName()));
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getValue().getName(), it.getCandidate().getName()));
subtitleByVideo.put(it.getValue(), it.getCandidate());
} }
}
}
return subtitleByVideo; return subtitleByVideo;
} }
return emptyMap();
}
private List<String> detectSeriesQuery(Collection<File> mediaFiles, Locale locale) throws Exception { private List<String> detectSeriesQuery(Collection<File> mediaFiles, Locale locale) throws Exception {
// detect series name by common word sequence // detect series name by common word sequence

View File

@ -165,13 +165,13 @@ import net.sourceforge.filebot.similarity.*
def parseEpisodeNumber(path, strict = true) { def parseEpisodeNumber(path, strict = true) {
def input = path instanceof File ? path.name : path.toString() def input = path instanceof File ? path.name : path.toString()
def sxe = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(input) def sxe = MediaDetection.parseEpisodeNumber(input, strict)
return sxe == null || sxe.isEmpty() ? null : sxe[0] return sxe == null || sxe.isEmpty() ? null : sxe[0]
} }
def parseDate(path) { def parseDate(path) {
def input = path instanceof File ? path.name : path.toString() def input = path instanceof File ? path.name : path.toString()
return new DateMetric().parse(input) return MediaDetection.parseDate(input)
} }
def detectSeriesName(files, locale = Locale.ENGLISH) { def detectSeriesName(files, locale = Locale.ENGLISH) {

View File

@ -41,13 +41,16 @@ import net.sourceforge.filebot.MediaTypes;
import net.sourceforge.filebot.WebServices; import net.sourceforge.filebot.WebServices;
import net.sourceforge.filebot.similarity.CommonSequenceMatcher; import net.sourceforge.filebot.similarity.CommonSequenceMatcher;
import net.sourceforge.filebot.similarity.DateMatcher; import net.sourceforge.filebot.similarity.DateMatcher;
import net.sourceforge.filebot.similarity.DateMetric;
import net.sourceforge.filebot.similarity.MetricAvg; import net.sourceforge.filebot.similarity.MetricAvg;
import net.sourceforge.filebot.similarity.NameSimilarityMetric; import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher; import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity; import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityComparator; import net.sourceforge.filebot.similarity.SimilarityComparator;
import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.web.Date;
import net.sourceforge.filebot.web.Movie; import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.MovieIdentificationService; import net.sourceforge.filebot.web.MovieIdentificationService;
import net.sourceforge.filebot.web.SearchResult; import net.sourceforge.filebot.web.SearchResult;
@ -73,6 +76,21 @@ public class MediaDetection {
} }
public static boolean isEpisode(String name, boolean strict) {
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
}
public static List<SxE> parseEpisodeNumber(String string, boolean strict) {
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string);
}
public static Date parseDate(Object object) {
return new DateMetric().parse(object);
}
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception { public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
// map series names by folder // map series names by folder
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>(); Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();

View File

@ -330,7 +330,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
name = normalizePunctuation(name); name = normalizePunctuation(name);
// normalize to lower case // normalize to lower case
name.toLowerCase(); name = name.toLowerCase();
transformCache.put(object, name); transformCache.put(object, name);
return name; return name;

View File

@ -3,7 +3,10 @@ package net.sourceforge.filebot.subtitle;
import static java.lang.Math.*; import static java.lang.Math.*;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static net.sourceforge.filebot.MediaTypes.*; import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.FileUtilities.*; import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File; import java.io.File;
@ -16,12 +19,19 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.MetricAvg;
import net.sourceforge.filebot.similarity.NameSimilarityMetric; import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.ui.Language; import net.sourceforge.filebot.ui.Language;
import net.sourceforge.filebot.vfs.ArchiveType; import net.sourceforge.filebot.vfs.ArchiveType;
@ -33,13 +43,35 @@ import net.sourceforge.filebot.web.SubtitleProvider;
public final class SubtitleUtilities { public final class SubtitleUtilities {
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
SimilarityMetric[] metrics = EpisodeMetrics.defaultSequence(false);
// optimize for generic media <-> subtitle matching
replaceAll(asList(metrics), EpisodeMetrics.SubstringFields, EpisodeMetrics.SubstringSequence);
// first match everything as best as possible, then filter possibly bad matches
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
subtitleByVideo.put(it.getValue(), it.getCandidate());
}
}
return subtitleByVideo;
}
public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception { public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception {
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(); List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>();
// search for and automatically select movie / show entry // search for and automatically select movie / show entry
Set<SearchResult> resultSet = new HashSet<SearchResult>(); Set<SearchResult> resultSet = new HashSet<SearchResult>();
for (String query : querySet) { for (String query : querySet) {
resultSet.addAll(findProbableMatches(query, service.search(query), 0.9f)); resultSet.addAll(findProbableSearchResults(query, service.search(query)));
} }
// fetch subtitles for all search results // fetch subtitles for all search results
@ -51,16 +83,16 @@ public final class SubtitleUtilities {
} }
protected static Collection<SearchResult> findProbableMatches(String query, Iterable<? extends SearchResult> searchResults, float threshold) { protected static Collection<SearchResult> findProbableSearchResults(String query, Iterable<? extends SearchResult> searchResults) {
// auto-select most probable search result // auto-select most probable search result
Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>(); Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>();
// use name similarity metric // use name similarity metric
SimilarityMetric metric = new NameSimilarityMetric(); SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric());
// find probable matches using name similarity > threshold // find probable matches using name similarity > threshold
for (SearchResult result : searchResults) { for (SearchResult result : searchResults) {
if (metric.getSimilarity(query, result.getName()) > threshold) { if (metric.getSimilarity(query, removeTrailingBrackets(result.getName())) > 0.8f) {
probableMatches.add(result); probableMatches.add(result);
} }
} }

View File

@ -61,10 +61,10 @@ import net.miginfocom.swing.MigLayout;
import net.sourceforge.filebot.Analytics; import net.sourceforge.filebot.Analytics;
import net.sourceforge.filebot.ResourceManager; import net.sourceforge.filebot.ResourceManager;
import net.sourceforge.filebot.similarity.EpisodeMetrics; import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match; import net.sourceforge.filebot.similarity.MetricCascade;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.vfs.MemoryFile; import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.SubtitleDescriptor; import net.sourceforge.filebot.web.SubtitleDescriptor;
import net.sourceforge.filebot.web.SubtitleProvider; import net.sourceforge.filebot.web.SubtitleProvider;
import net.sourceforge.filebot.web.VideoHashSubtitleService; import net.sourceforge.filebot.web.VideoHashSubtitleService;
@ -437,7 +437,7 @@ class SubtitleAutoMatchDialog extends JDialog {
} }
if (f < 0.9f) { if (f < 0.9f) {
setOpaque(true); setOpaque(true);
setBackground(derive(Color.RED, 1 - (f * 0.75f))); setBackground(derive(Color.RED, (1 - f) * 0.5f));
} }
} }
@ -968,20 +968,32 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override @Override
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception { protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>(); // ignore clutter files from processing
for (File file : files) { files = filter(files, NON_CLUTTER_FILES);
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
}
// auto-detect query and search for subtitles // auto-detect query and search for subtitles
Collection<String> querySet = detectSeriesNames(files, Locale.ENGLISH); Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
// auto-detect series names
querySet.addAll(detectSeriesNames(files, Locale.ROOT));
// auto-detect movie names
for (File f : files) {
if (!isEpisode(f.getName(), false)) {
for (Movie movie : detectMovie(f, null, null, Locale.ROOT, false)) {
querySet.add(movie.getName());
}
}
}
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName); List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
// if auto-detection fails, ask user for input // if auto-detection fails, ask user for input
if (subtitles.isEmpty()) { if (subtitles.isEmpty()) {
// dialog may have been cancelled by now // dialog may have been cancelled by now
if (Thread.interrupted()) if (Thread.interrupted()) {
throw new CancellationException(); throw new CancellationException();
}
querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent); querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent);
subtitles = findSubtitles(service, querySet, languageName); subtitles = findSubtitles(service, querySet, languageName);
@ -992,18 +1004,20 @@ class SubtitleAutoMatchDialog extends JDialog {
} }
} }
// first match everything as best as possible, then filter possibly bad matches // files by possible subtitles matches
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, EpisodeMetrics.defaultSequence(true)); Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
SimilarityMetric sanity = EpisodeMetrics.verificationMetric(); for (File file : files) {
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= 1) {
subtitlesByFile.get(it.getValue()).add(it.getCandidate());
} }
// first match everything as best as possible, then filter possibly bad matches
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
subtitlesByFile.get(it.getKey()).add(it.getValue());
} }
// add other possible matches to the options // add other possible matches to the options
float minMatchSimilarity = 0.6f; SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
float minMatchSimilarity = 0.5f;
for (File file : files) { for (File file : files) {
// add matching subtitles // add matching subtitles
@ -1020,7 +1034,8 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override @Override
public float getMatchProbabilty(File videoFile, SubtitleDescriptor descriptor) { public float getMatchProbabilty(File videoFile, SubtitleDescriptor descriptor) {
return EpisodeMetrics.verificationMetric().getSimilarity(videoFile, descriptor) * 0.9f; SimilarityMetric metric = new MetricCascade(EpisodeMetrics.SeasonEpisode, EpisodeMetrics.AirDate, EpisodeMetrics.Name);
return 0.9f * metric.getSimilarity(videoFile, descriptor);
} }
} }