* lots of improvements to subtitle-automatching esp. when handling movies
This commit is contained in:
parent
8fa867ae49
commit
8bd737ae71
|
@ -59,9 +59,7 @@ import net.sourceforge.filebot.hash.VerificationFileReader;
|
||||||
import net.sourceforge.filebot.hash.VerificationFileWriter;
|
import net.sourceforge.filebot.hash.VerificationFileWriter;
|
||||||
import net.sourceforge.filebot.media.MediaDetection;
|
import net.sourceforge.filebot.media.MediaDetection;
|
||||||
import net.sourceforge.filebot.similarity.EpisodeMatcher;
|
import net.sourceforge.filebot.similarity.EpisodeMatcher;
|
||||||
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
|
||||||
import net.sourceforge.filebot.similarity.Match;
|
import net.sourceforge.filebot.similarity.Match;
|
||||||
import net.sourceforge.filebot.similarity.Matcher;
|
|
||||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
||||||
|
@ -711,28 +709,21 @@ public class CmdlineOperations implements CmdlineInterface {
|
||||||
|
|
||||||
|
|
||||||
private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception {
|
private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception {
|
||||||
Map<File, SubtitleDescriptor> subtitleByVideo = new HashMap<File, SubtitleDescriptor>();
|
|
||||||
|
|
||||||
// search for subtitles
|
// search for subtitles
|
||||||
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName());
|
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName());
|
||||||
|
|
||||||
// match subtitle files to video files
|
// match subtitle files to video files
|
||||||
if (subtitles.size() > 0) {
|
if (subtitles.size() > 0) {
|
||||||
// first match everything as best as possible, then filter possibly bad matches
|
Map<File, SubtitleDescriptor> subtitleByVideo = matchSubtitles(videoFiles, subtitles, strict);
|
||||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(videoFiles, subtitles, false, EpisodeMetrics.defaultSequence(true));
|
for (Entry<File, SubtitleDescriptor> it : subtitleByVideo.entrySet()) {
|
||||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getKey().getName(), it.getValue().getName()));
|
||||||
|
|
||||||
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
|
||||||
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
|
|
||||||
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getValue().getName(), it.getCandidate().getName()));
|
|
||||||
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return subtitleByVideo;
|
return subtitleByVideo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return emptyMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<String> detectSeriesQuery(Collection<File> mediaFiles, Locale locale) throws Exception {
|
private List<String> detectSeriesQuery(Collection<File> mediaFiles, Locale locale) throws Exception {
|
||||||
// detect series name by common word sequence
|
// detect series name by common word sequence
|
||||||
|
|
|
@ -165,13 +165,13 @@ import net.sourceforge.filebot.similarity.*
|
||||||
|
|
||||||
def parseEpisodeNumber(path, strict = true) {
|
def parseEpisodeNumber(path, strict = true) {
|
||||||
def input = path instanceof File ? path.name : path.toString()
|
def input = path instanceof File ? path.name : path.toString()
|
||||||
def sxe = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(input)
|
def sxe = MediaDetection.parseEpisodeNumber(input, strict)
|
||||||
return sxe == null || sxe.isEmpty() ? null : sxe[0]
|
return sxe == null || sxe.isEmpty() ? null : sxe[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
def parseDate(path) {
|
def parseDate(path) {
|
||||||
def input = path instanceof File ? path.name : path.toString()
|
def input = path instanceof File ? path.name : path.toString()
|
||||||
return new DateMetric().parse(input)
|
return MediaDetection.parseDate(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
def detectSeriesName(files, locale = Locale.ENGLISH) {
|
def detectSeriesName(files, locale = Locale.ENGLISH) {
|
||||||
|
|
|
@ -41,13 +41,16 @@ import net.sourceforge.filebot.MediaTypes;
|
||||||
import net.sourceforge.filebot.WebServices;
|
import net.sourceforge.filebot.WebServices;
|
||||||
import net.sourceforge.filebot.similarity.CommonSequenceMatcher;
|
import net.sourceforge.filebot.similarity.CommonSequenceMatcher;
|
||||||
import net.sourceforge.filebot.similarity.DateMatcher;
|
import net.sourceforge.filebot.similarity.DateMatcher;
|
||||||
|
import net.sourceforge.filebot.similarity.DateMetric;
|
||||||
import net.sourceforge.filebot.similarity.MetricAvg;
|
import net.sourceforge.filebot.similarity.MetricAvg;
|
||||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher;
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher;
|
||||||
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
||||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||||
|
import net.sourceforge.filebot.web.Date;
|
||||||
import net.sourceforge.filebot.web.Movie;
|
import net.sourceforge.filebot.web.Movie;
|
||||||
import net.sourceforge.filebot.web.MovieIdentificationService;
|
import net.sourceforge.filebot.web.MovieIdentificationService;
|
||||||
import net.sourceforge.filebot.web.SearchResult;
|
import net.sourceforge.filebot.web.SearchResult;
|
||||||
|
@ -73,6 +76,21 @@ public class MediaDetection {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean isEpisode(String name, boolean strict) {
|
||||||
|
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static List<SxE> parseEpisodeNumber(String string, boolean strict) {
|
||||||
|
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Date parseDate(Object object) {
|
||||||
|
return new DateMetric().parse(object);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
|
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
|
||||||
// map series names by folder
|
// map series names by folder
|
||||||
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
|
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
|
||||||
|
|
|
@ -330,7 +330,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
name = normalizePunctuation(name);
|
name = normalizePunctuation(name);
|
||||||
|
|
||||||
// normalize to lower case
|
// normalize to lower case
|
||||||
name.toLowerCase();
|
name = name.toLowerCase();
|
||||||
|
|
||||||
transformCache.put(object, name);
|
transformCache.put(object, name);
|
||||||
return name;
|
return name;
|
||||||
|
|
|
@ -3,7 +3,10 @@ package net.sourceforge.filebot.subtitle;
|
||||||
|
|
||||||
|
|
||||||
import static java.lang.Math.*;
|
import static java.lang.Math.*;
|
||||||
|
import static java.util.Arrays.*;
|
||||||
|
import static java.util.Collections.*;
|
||||||
import static net.sourceforge.filebot.MediaTypes.*;
|
import static net.sourceforge.filebot.MediaTypes.*;
|
||||||
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import static net.sourceforge.tuned.FileUtilities.*;
|
import static net.sourceforge.tuned.FileUtilities.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -16,12 +19,19 @@ import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
||||||
|
import net.sourceforge.filebot.similarity.Match;
|
||||||
|
import net.sourceforge.filebot.similarity.Matcher;
|
||||||
|
import net.sourceforge.filebot.similarity.MetricAvg;
|
||||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||||
|
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||||
import net.sourceforge.filebot.ui.Language;
|
import net.sourceforge.filebot.ui.Language;
|
||||||
import net.sourceforge.filebot.vfs.ArchiveType;
|
import net.sourceforge.filebot.vfs.ArchiveType;
|
||||||
|
@ -33,13 +43,35 @@ import net.sourceforge.filebot.web.SubtitleProvider;
|
||||||
|
|
||||||
public final class SubtitleUtilities {
|
public final class SubtitleUtilities {
|
||||||
|
|
||||||
|
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
|
||||||
|
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
||||||
|
|
||||||
|
SimilarityMetric[] metrics = EpisodeMetrics.defaultSequence(false);
|
||||||
|
|
||||||
|
// optimize for generic media <-> subtitle matching
|
||||||
|
replaceAll(asList(metrics), EpisodeMetrics.SubstringFields, EpisodeMetrics.SubstringSequence);
|
||||||
|
|
||||||
|
// first match everything as best as possible, then filter possibly bad matches
|
||||||
|
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
||||||
|
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
||||||
|
|
||||||
|
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
||||||
|
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
|
||||||
|
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return subtitleByVideo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception {
|
public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception {
|
||||||
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>();
|
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>();
|
||||||
|
|
||||||
// search for and automatically select movie / show entry
|
// search for and automatically select movie / show entry
|
||||||
Set<SearchResult> resultSet = new HashSet<SearchResult>();
|
Set<SearchResult> resultSet = new HashSet<SearchResult>();
|
||||||
for (String query : querySet) {
|
for (String query : querySet) {
|
||||||
resultSet.addAll(findProbableMatches(query, service.search(query), 0.9f));
|
resultSet.addAll(findProbableSearchResults(query, service.search(query)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// fetch subtitles for all search results
|
// fetch subtitles for all search results
|
||||||
|
@ -51,16 +83,16 @@ public final class SubtitleUtilities {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected static Collection<SearchResult> findProbableMatches(String query, Iterable<? extends SearchResult> searchResults, float threshold) {
|
protected static Collection<SearchResult> findProbableSearchResults(String query, Iterable<? extends SearchResult> searchResults) {
|
||||||
// auto-select most probable search result
|
// auto-select most probable search result
|
||||||
Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>();
|
Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>();
|
||||||
|
|
||||||
// use name similarity metric
|
// use name similarity metric
|
||||||
SimilarityMetric metric = new NameSimilarityMetric();
|
SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric());
|
||||||
|
|
||||||
// find probable matches using name similarity > threshold
|
// find probable matches using name similarity > threshold
|
||||||
for (SearchResult result : searchResults) {
|
for (SearchResult result : searchResults) {
|
||||||
if (metric.getSimilarity(query, result.getName()) > threshold) {
|
if (metric.getSimilarity(query, removeTrailingBrackets(result.getName())) > 0.8f) {
|
||||||
probableMatches.add(result);
|
probableMatches.add(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,10 +61,10 @@ import net.miginfocom.swing.MigLayout;
|
||||||
import net.sourceforge.filebot.Analytics;
|
import net.sourceforge.filebot.Analytics;
|
||||||
import net.sourceforge.filebot.ResourceManager;
|
import net.sourceforge.filebot.ResourceManager;
|
||||||
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
||||||
import net.sourceforge.filebot.similarity.Match;
|
import net.sourceforge.filebot.similarity.MetricCascade;
|
||||||
import net.sourceforge.filebot.similarity.Matcher;
|
|
||||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||||
import net.sourceforge.filebot.vfs.MemoryFile;
|
import net.sourceforge.filebot.vfs.MemoryFile;
|
||||||
|
import net.sourceforge.filebot.web.Movie;
|
||||||
import net.sourceforge.filebot.web.SubtitleDescriptor;
|
import net.sourceforge.filebot.web.SubtitleDescriptor;
|
||||||
import net.sourceforge.filebot.web.SubtitleProvider;
|
import net.sourceforge.filebot.web.SubtitleProvider;
|
||||||
import net.sourceforge.filebot.web.VideoHashSubtitleService;
|
import net.sourceforge.filebot.web.VideoHashSubtitleService;
|
||||||
|
@ -437,7 +437,7 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
}
|
}
|
||||||
if (f < 0.9f) {
|
if (f < 0.9f) {
|
||||||
setOpaque(true);
|
setOpaque(true);
|
||||||
setBackground(derive(Color.RED, 1 - (f * 0.75f)));
|
setBackground(derive(Color.RED, (1 - f) * 0.5f));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -968,20 +968,32 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
|
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
|
||||||
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
|
// ignore clutter files from processing
|
||||||
for (File file : files) {
|
files = filter(files, NON_CLUTTER_FILES);
|
||||||
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
|
|
||||||
}
|
|
||||||
|
|
||||||
// auto-detect query and search for subtitles
|
// auto-detect query and search for subtitles
|
||||||
Collection<String> querySet = detectSeriesNames(files, Locale.ENGLISH);
|
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||||
|
|
||||||
|
// auto-detect series names
|
||||||
|
querySet.addAll(detectSeriesNames(files, Locale.ROOT));
|
||||||
|
|
||||||
|
// auto-detect movie names
|
||||||
|
for (File f : files) {
|
||||||
|
if (!isEpisode(f.getName(), false)) {
|
||||||
|
for (Movie movie : detectMovie(f, null, null, Locale.ROOT, false)) {
|
||||||
|
querySet.add(movie.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
|
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
|
||||||
|
|
||||||
// if auto-detection fails, ask user for input
|
// if auto-detection fails, ask user for input
|
||||||
if (subtitles.isEmpty()) {
|
if (subtitles.isEmpty()) {
|
||||||
// dialog may have been cancelled by now
|
// dialog may have been cancelled by now
|
||||||
if (Thread.interrupted())
|
if (Thread.interrupted()) {
|
||||||
throw new CancellationException();
|
throw new CancellationException();
|
||||||
|
}
|
||||||
|
|
||||||
querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent);
|
querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent);
|
||||||
subtitles = findSubtitles(service, querySet, languageName);
|
subtitles = findSubtitles(service, querySet, languageName);
|
||||||
|
@ -992,18 +1004,20 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// first match everything as best as possible, then filter possibly bad matches
|
// files by possible subtitles matches
|
||||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, EpisodeMetrics.defaultSequence(true));
|
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
|
||||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
for (File file : files) {
|
||||||
|
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
|
||||||
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
|
||||||
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= 1) {
|
|
||||||
subtitlesByFile.get(it.getValue()).add(it.getCandidate());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// first match everything as best as possible, then filter possibly bad matches
|
||||||
|
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
|
||||||
|
subtitlesByFile.get(it.getKey()).add(it.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
// add other possible matches to the options
|
// add other possible matches to the options
|
||||||
float minMatchSimilarity = 0.6f;
|
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
||||||
|
float minMatchSimilarity = 0.5f;
|
||||||
|
|
||||||
for (File file : files) {
|
for (File file : files) {
|
||||||
// add matching subtitles
|
// add matching subtitles
|
||||||
|
@ -1020,7 +1034,8 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMatchProbabilty(File videoFile, SubtitleDescriptor descriptor) {
|
public float getMatchProbabilty(File videoFile, SubtitleDescriptor descriptor) {
|
||||||
return EpisodeMetrics.verificationMetric().getSimilarity(videoFile, descriptor) * 0.9f;
|
SimilarityMetric metric = new MetricCascade(EpisodeMetrics.SeasonEpisode, EpisodeMetrics.AirDate, EpisodeMetrics.Name);
|
||||||
|
return 0.9f * metric.getSimilarity(videoFile, descriptor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue