* lots of improvements to subtitle-automatching esp. when handling movies

This commit is contained in:
Reinhard Pointner 2012-07-16 10:09:21 +00:00
parent 8fa867ae49
commit 8bd737ae71
6 changed files with 103 additions and 47 deletions

View File

@ -59,9 +59,7 @@ import net.sourceforge.filebot.hash.VerificationFileReader;
import net.sourceforge.filebot.hash.VerificationFileWriter;
import net.sourceforge.filebot.media.MediaDetection;
import net.sourceforge.filebot.similarity.EpisodeMatcher;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityComparator;
@ -711,26 +709,19 @@ public class CmdlineOperations implements CmdlineInterface {
private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception {
Map<File, SubtitleDescriptor> subtitleByVideo = new HashMap<File, SubtitleDescriptor>();
// search for subtitles
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName());
// match subtitle files to video files
if (subtitles.size() > 0) {
// first match everything as best as possible, then filter possibly bad matches
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(videoFiles, subtitles, false, EpisodeMetrics.defaultSequence(true));
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getValue().getName(), it.getCandidate().getName()));
subtitleByVideo.put(it.getValue(), it.getCandidate());
}
Map<File, SubtitleDescriptor> subtitleByVideo = matchSubtitles(videoFiles, subtitles, strict);
for (Entry<File, SubtitleDescriptor> it : subtitleByVideo.entrySet()) {
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getKey().getName(), it.getValue().getName()));
}
return subtitleByVideo;
}
return subtitleByVideo;
return emptyMap();
}

View File

@ -165,13 +165,13 @@ import net.sourceforge.filebot.similarity.*
def parseEpisodeNumber(path, strict = true) {
def input = path instanceof File ? path.name : path.toString()
def sxe = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(input)
def sxe = MediaDetection.parseEpisodeNumber(input, strict)
return sxe == null || sxe.isEmpty() ? null : sxe[0]
}
def parseDate(path) {
def input = path instanceof File ? path.name : path.toString()
return new DateMetric().parse(input)
return MediaDetection.parseDate(input)
}
def detectSeriesName(files, locale = Locale.ENGLISH) {

View File

@ -41,13 +41,16 @@ import net.sourceforge.filebot.MediaTypes;
import net.sourceforge.filebot.WebServices;
import net.sourceforge.filebot.similarity.CommonSequenceMatcher;
import net.sourceforge.filebot.similarity.DateMatcher;
import net.sourceforge.filebot.similarity.DateMetric;
import net.sourceforge.filebot.similarity.MetricAvg;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityComparator;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.web.Date;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.MovieIdentificationService;
import net.sourceforge.filebot.web.SearchResult;
@ -73,6 +76,21 @@ public class MediaDetection {
}
public static boolean isEpisode(String name, boolean strict) {
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
}
public static List<SxE> parseEpisodeNumber(String string, boolean strict) {
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string);
}
public static Date parseDate(Object object) {
return new DateMetric().parse(object);
}
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
// map series names by folder
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();

View File

@ -330,7 +330,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
name = normalizePunctuation(name);
// normalize to lower case
name.toLowerCase();
name = name.toLowerCase();
transformCache.put(object, name);
return name;

View File

@ -3,7 +3,10 @@ package net.sourceforge.filebot.subtitle;
import static java.lang.Math.*;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
@ -16,12 +19,19 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.MetricAvg;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.ui.Language;
import net.sourceforge.filebot.vfs.ArchiveType;
@ -33,13 +43,35 @@ import net.sourceforge.filebot.web.SubtitleProvider;
public final class SubtitleUtilities {
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
SimilarityMetric[] metrics = EpisodeMetrics.defaultSequence(false);
// optimize for generic media <-> subtitle matching
replaceAll(asList(metrics), EpisodeMetrics.SubstringFields, EpisodeMetrics.SubstringSequence);
// first match everything as best as possible, then filter possibly bad matches
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
subtitleByVideo.put(it.getValue(), it.getCandidate());
}
}
return subtitleByVideo;
}
public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception {
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>();
// search for and automatically select movie / show entry
Set<SearchResult> resultSet = new HashSet<SearchResult>();
for (String query : querySet) {
resultSet.addAll(findProbableMatches(query, service.search(query), 0.9f));
resultSet.addAll(findProbableSearchResults(query, service.search(query)));
}
// fetch subtitles for all search results
@ -50,17 +82,17 @@ public final class SubtitleUtilities {
return subtitles;
}
protected static Collection<SearchResult> findProbableMatches(String query, Iterable<? extends SearchResult> searchResults, float threshold) {
protected static Collection<SearchResult> findProbableSearchResults(String query, Iterable<? extends SearchResult> searchResults) {
// auto-select most probable search result
Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>();
// use name similarity metric
SimilarityMetric metric = new NameSimilarityMetric();
SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric());
// find probable matches using name similarity > threshold
for (SearchResult result : searchResults) {
if (metric.getSimilarity(query, result.getName()) > threshold) {
if (metric.getSimilarity(query, removeTrailingBrackets(result.getName())) > 0.8f) {
probableMatches.add(result);
}
}
@ -68,7 +100,7 @@ public final class SubtitleUtilities {
return probableMatches;
}
/**
* Detect charset and parse subtitle file even if extension is invalid
*/
@ -108,7 +140,7 @@ public final class SubtitleUtilities {
throw new IOException("Cannot read subtitle format");
}
public static ByteBuffer exportSubtitles(MemoryFile data, SubtitleFormat outputFormat, long outputTimingOffset, Charset outputEncoding) throws IOException {
if (outputFormat != null && outputFormat != SubtitleFormat.SubRip) {
throw new IllegalArgumentException("Format not supported");
@ -134,7 +166,7 @@ public final class SubtitleUtilities {
return outputEncoding.encode(getText(data.getData()));
}
public static SubtitleFormat getSubtitleFormat(File file) {
for (SubtitleFormat it : SubtitleFormat.values()) {
if (it.getFilter().accept(file))
@ -144,7 +176,7 @@ public final class SubtitleUtilities {
return null;
}
public static SubtitleFormat getSubtitleFormatByName(String name) {
for (SubtitleFormat it : SubtitleFormat.values()) {
// check by name
@ -159,7 +191,7 @@ public final class SubtitleUtilities {
return null;
}
public static String formatSubtitle(String name, String languageName, String type) {
StringBuilder sb = new StringBuilder(name);
@ -181,7 +213,7 @@ public final class SubtitleUtilities {
return sb.toString();
}
public static MemoryFile fetchSubtitle(SubtitleDescriptor descriptor) throws Exception {
ByteBuffer data = descriptor.fetch();
@ -203,7 +235,7 @@ public final class SubtitleUtilities {
return new MemoryFile(descriptor.getPath(), data);
}
/**
* Dummy constructor to prevent instantiation.
*/

View File

@ -61,10 +61,10 @@ import net.miginfocom.swing.MigLayout;
import net.sourceforge.filebot.Analytics;
import net.sourceforge.filebot.ResourceManager;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.MetricCascade;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.SubtitleDescriptor;
import net.sourceforge.filebot.web.SubtitleProvider;
import net.sourceforge.filebot.web.VideoHashSubtitleService;
@ -437,7 +437,7 @@ class SubtitleAutoMatchDialog extends JDialog {
}
if (f < 0.9f) {
setOpaque(true);
setBackground(derive(Color.RED, 1 - (f * 0.75f)));
setBackground(derive(Color.RED, (1 - f) * 0.5f));
}
}
@ -968,20 +968,32 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
for (File file : files) {
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
}
// ignore clutter files from processing
files = filter(files, NON_CLUTTER_FILES);
// auto-detect query and search for subtitles
Collection<String> querySet = detectSeriesNames(files, Locale.ENGLISH);
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
// auto-detect series names
querySet.addAll(detectSeriesNames(files, Locale.ROOT));
// auto-detect movie names
for (File f : files) {
if (!isEpisode(f.getName(), false)) {
for (Movie movie : detectMovie(f, null, null, Locale.ROOT, false)) {
querySet.add(movie.getName());
}
}
}
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
// if auto-detection fails, ask user for input
if (subtitles.isEmpty()) {
// dialog may have been cancelled by now
if (Thread.interrupted())
if (Thread.interrupted()) {
throw new CancellationException();
}
querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent);
subtitles = findSubtitles(service, querySet, languageName);
@ -992,18 +1004,20 @@ class SubtitleAutoMatchDialog extends JDialog {
}
}
// first match everything as best as possible, then filter possibly bad matches
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, EpisodeMetrics.defaultSequence(true));
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
// files by possible subtitles matches
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
for (File file : files) {
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
}
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= 1) {
subtitlesByFile.get(it.getValue()).add(it.getCandidate());
}
// first match everything as best as possible, then filter possibly bad matches
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
subtitlesByFile.get(it.getKey()).add(it.getValue());
}
// add other possible matches to the options
float minMatchSimilarity = 0.6f;
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
float minMatchSimilarity = 0.5f;
for (File file : files) {
// add matching subtitles
@ -1020,7 +1034,8 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override
public float getMatchProbabilty(File videoFile, SubtitleDescriptor descriptor) {
return EpisodeMetrics.verificationMetric().getSimilarity(videoFile, descriptor) * 0.9f;
SimilarityMetric metric = new MetricCascade(EpisodeMetrics.SeasonEpisode, EpisodeMetrics.AirDate, EpisodeMetrics.Name);
return 0.9f * metric.getSimilarity(videoFile, descriptor);
}
}