* lots of improvements to subtitle-automatching esp. when handling movies
This commit is contained in:
parent
8fa867ae49
commit
8bd737ae71
|
@ -59,9 +59,7 @@ import net.sourceforge.filebot.hash.VerificationFileReader;
|
|||
import net.sourceforge.filebot.hash.VerificationFileWriter;
|
||||
import net.sourceforge.filebot.media.MediaDetection;
|
||||
import net.sourceforge.filebot.similarity.EpisodeMatcher;
|
||||
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
||||
import net.sourceforge.filebot.similarity.Match;
|
||||
import net.sourceforge.filebot.similarity.Matcher;
|
||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
||||
|
@ -711,26 +709,19 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
|
||||
|
||||
private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception {
|
||||
Map<File, SubtitleDescriptor> subtitleByVideo = new HashMap<File, SubtitleDescriptor>();
|
||||
|
||||
// search for subtitles
|
||||
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName());
|
||||
|
||||
// match subtitle files to video files
|
||||
if (subtitles.size() > 0) {
|
||||
// first match everything as best as possible, then filter possibly bad matches
|
||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(videoFiles, subtitles, false, EpisodeMetrics.defaultSequence(true));
|
||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
||||
|
||||
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
||||
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
|
||||
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getValue().getName(), it.getCandidate().getName()));
|
||||
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
||||
}
|
||||
Map<File, SubtitleDescriptor> subtitleByVideo = matchSubtitles(videoFiles, subtitles, strict);
|
||||
for (Entry<File, SubtitleDescriptor> it : subtitleByVideo.entrySet()) {
|
||||
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getKey().getName(), it.getValue().getName()));
|
||||
}
|
||||
return subtitleByVideo;
|
||||
}
|
||||
|
||||
return subtitleByVideo;
|
||||
return emptyMap();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -165,13 +165,13 @@ import net.sourceforge.filebot.similarity.*
|
|||
|
||||
def parseEpisodeNumber(path, strict = true) {
|
||||
def input = path instanceof File ? path.name : path.toString()
|
||||
def sxe = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(input)
|
||||
def sxe = MediaDetection.parseEpisodeNumber(input, strict)
|
||||
return sxe == null || sxe.isEmpty() ? null : sxe[0]
|
||||
}
|
||||
|
||||
def parseDate(path) {
|
||||
def input = path instanceof File ? path.name : path.toString()
|
||||
return new DateMetric().parse(input)
|
||||
return MediaDetection.parseDate(input)
|
||||
}
|
||||
|
||||
def detectSeriesName(files, locale = Locale.ENGLISH) {
|
||||
|
|
|
@ -41,13 +41,16 @@ import net.sourceforge.filebot.MediaTypes;
|
|||
import net.sourceforge.filebot.WebServices;
|
||||
import net.sourceforge.filebot.similarity.CommonSequenceMatcher;
|
||||
import net.sourceforge.filebot.similarity.DateMatcher;
|
||||
import net.sourceforge.filebot.similarity.DateMetric;
|
||||
import net.sourceforge.filebot.similarity.MetricAvg;
|
||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher;
|
||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||
import net.sourceforge.filebot.web.Date;
|
||||
import net.sourceforge.filebot.web.Movie;
|
||||
import net.sourceforge.filebot.web.MovieIdentificationService;
|
||||
import net.sourceforge.filebot.web.SearchResult;
|
||||
|
@ -73,6 +76,21 @@ public class MediaDetection {
|
|||
}
|
||||
|
||||
|
||||
public static boolean isEpisode(String name, boolean strict) {
|
||||
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
|
||||
}
|
||||
|
||||
|
||||
public static List<SxE> parseEpisodeNumber(String string, boolean strict) {
|
||||
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string);
|
||||
}
|
||||
|
||||
|
||||
public static Date parseDate(Object object) {
|
||||
return new DateMetric().parse(object);
|
||||
}
|
||||
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
|
||||
// map series names by folder
|
||||
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
|
||||
|
|
|
@ -330,7 +330,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
name = normalizePunctuation(name);
|
||||
|
||||
// normalize to lower case
|
||||
name.toLowerCase();
|
||||
name = name.toLowerCase();
|
||||
|
||||
transformCache.put(object, name);
|
||||
return name;
|
||||
|
|
|
@ -3,7 +3,10 @@ package net.sourceforge.filebot.subtitle;
|
|||
|
||||
|
||||
import static java.lang.Math.*;
|
||||
import static java.util.Arrays.*;
|
||||
import static java.util.Collections.*;
|
||||
import static net.sourceforge.filebot.MediaTypes.*;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -16,12 +19,19 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
||||
import net.sourceforge.filebot.similarity.Match;
|
||||
import net.sourceforge.filebot.similarity.Matcher;
|
||||
import net.sourceforge.filebot.similarity.MetricAvg;
|
||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||
import net.sourceforge.filebot.ui.Language;
|
||||
import net.sourceforge.filebot.vfs.ArchiveType;
|
||||
|
@ -33,13 +43,35 @@ import net.sourceforge.filebot.web.SubtitleProvider;
|
|||
|
||||
public final class SubtitleUtilities {
|
||||
|
||||
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
|
||||
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
||||
|
||||
SimilarityMetric[] metrics = EpisodeMetrics.defaultSequence(false);
|
||||
|
||||
// optimize for generic media <-> subtitle matching
|
||||
replaceAll(asList(metrics), EpisodeMetrics.SubstringFields, EpisodeMetrics.SubstringSequence);
|
||||
|
||||
// first match everything as best as possible, then filter possibly bad matches
|
||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
||||
|
||||
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
||||
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
|
||||
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
||||
}
|
||||
}
|
||||
|
||||
return subtitleByVideo;
|
||||
}
|
||||
|
||||
|
||||
public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception {
|
||||
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>();
|
||||
|
||||
// search for and automatically select movie / show entry
|
||||
Set<SearchResult> resultSet = new HashSet<SearchResult>();
|
||||
for (String query : querySet) {
|
||||
resultSet.addAll(findProbableMatches(query, service.search(query), 0.9f));
|
||||
resultSet.addAll(findProbableSearchResults(query, service.search(query)));
|
||||
}
|
||||
|
||||
// fetch subtitles for all search results
|
||||
|
@ -50,17 +82,17 @@ public final class SubtitleUtilities {
|
|||
return subtitles;
|
||||
}
|
||||
|
||||
|
||||
protected static Collection<SearchResult> findProbableMatches(String query, Iterable<? extends SearchResult> searchResults, float threshold) {
|
||||
|
||||
protected static Collection<SearchResult> findProbableSearchResults(String query, Iterable<? extends SearchResult> searchResults) {
|
||||
// auto-select most probable search result
|
||||
Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>();
|
||||
|
||||
// use name similarity metric
|
||||
SimilarityMetric metric = new NameSimilarityMetric();
|
||||
SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric());
|
||||
|
||||
// find probable matches using name similarity > threshold
|
||||
for (SearchResult result : searchResults) {
|
||||
if (metric.getSimilarity(query, result.getName()) > threshold) {
|
||||
if (metric.getSimilarity(query, removeTrailingBrackets(result.getName())) > 0.8f) {
|
||||
probableMatches.add(result);
|
||||
}
|
||||
}
|
||||
|
@ -68,7 +100,7 @@ public final class SubtitleUtilities {
|
|||
return probableMatches;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Detect charset and parse subtitle file even if extension is invalid
|
||||
*/
|
||||
|
@ -108,7 +140,7 @@ public final class SubtitleUtilities {
|
|||
throw new IOException("Cannot read subtitle format");
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static ByteBuffer exportSubtitles(MemoryFile data, SubtitleFormat outputFormat, long outputTimingOffset, Charset outputEncoding) throws IOException {
|
||||
if (outputFormat != null && outputFormat != SubtitleFormat.SubRip) {
|
||||
throw new IllegalArgumentException("Format not supported");
|
||||
|
@ -134,7 +166,7 @@ public final class SubtitleUtilities {
|
|||
return outputEncoding.encode(getText(data.getData()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static SubtitleFormat getSubtitleFormat(File file) {
|
||||
for (SubtitleFormat it : SubtitleFormat.values()) {
|
||||
if (it.getFilter().accept(file))
|
||||
|
@ -144,7 +176,7 @@ public final class SubtitleUtilities {
|
|||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static SubtitleFormat getSubtitleFormatByName(String name) {
|
||||
for (SubtitleFormat it : SubtitleFormat.values()) {
|
||||
// check by name
|
||||
|
@ -159,7 +191,7 @@ public final class SubtitleUtilities {
|
|||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static String formatSubtitle(String name, String languageName, String type) {
|
||||
StringBuilder sb = new StringBuilder(name);
|
||||
|
||||
|
@ -181,7 +213,7 @@ public final class SubtitleUtilities {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static MemoryFile fetchSubtitle(SubtitleDescriptor descriptor) throws Exception {
|
||||
ByteBuffer data = descriptor.fetch();
|
||||
|
||||
|
@ -203,7 +235,7 @@ public final class SubtitleUtilities {
|
|||
return new MemoryFile(descriptor.getPath(), data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
*/
|
||||
|
|
|
@ -61,10 +61,10 @@ import net.miginfocom.swing.MigLayout;
|
|||
import net.sourceforge.filebot.Analytics;
|
||||
import net.sourceforge.filebot.ResourceManager;
|
||||
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
||||
import net.sourceforge.filebot.similarity.Match;
|
||||
import net.sourceforge.filebot.similarity.Matcher;
|
||||
import net.sourceforge.filebot.similarity.MetricCascade;
|
||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||
import net.sourceforge.filebot.vfs.MemoryFile;
|
||||
import net.sourceforge.filebot.web.Movie;
|
||||
import net.sourceforge.filebot.web.SubtitleDescriptor;
|
||||
import net.sourceforge.filebot.web.SubtitleProvider;
|
||||
import net.sourceforge.filebot.web.VideoHashSubtitleService;
|
||||
|
@ -437,7 +437,7 @@ class SubtitleAutoMatchDialog extends JDialog {
|
|||
}
|
||||
if (f < 0.9f) {
|
||||
setOpaque(true);
|
||||
setBackground(derive(Color.RED, 1 - (f * 0.75f)));
|
||||
setBackground(derive(Color.RED, (1 - f) * 0.5f));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -968,20 +968,32 @@ class SubtitleAutoMatchDialog extends JDialog {
|
|||
|
||||
@Override
|
||||
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
|
||||
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
|
||||
for (File file : files) {
|
||||
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
|
||||
}
|
||||
// ignore clutter files from processing
|
||||
files = filter(files, NON_CLUTTER_FILES);
|
||||
|
||||
// auto-detect query and search for subtitles
|
||||
Collection<String> querySet = detectSeriesNames(files, Locale.ENGLISH);
|
||||
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||
|
||||
// auto-detect series names
|
||||
querySet.addAll(detectSeriesNames(files, Locale.ROOT));
|
||||
|
||||
// auto-detect movie names
|
||||
for (File f : files) {
|
||||
if (!isEpisode(f.getName(), false)) {
|
||||
for (Movie movie : detectMovie(f, null, null, Locale.ROOT, false)) {
|
||||
querySet.add(movie.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
|
||||
|
||||
// if auto-detection fails, ask user for input
|
||||
if (subtitles.isEmpty()) {
|
||||
// dialog may have been cancelled by now
|
||||
if (Thread.interrupted())
|
||||
if (Thread.interrupted()) {
|
||||
throw new CancellationException();
|
||||
}
|
||||
|
||||
querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent);
|
||||
subtitles = findSubtitles(service, querySet, languageName);
|
||||
|
@ -992,18 +1004,20 @@ class SubtitleAutoMatchDialog extends JDialog {
|
|||
}
|
||||
}
|
||||
|
||||
// first match everything as best as possible, then filter possibly bad matches
|
||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, EpisodeMetrics.defaultSequence(true));
|
||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
||||
// files by possible subtitles matches
|
||||
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
|
||||
for (File file : files) {
|
||||
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
|
||||
}
|
||||
|
||||
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
||||
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= 1) {
|
||||
subtitlesByFile.get(it.getValue()).add(it.getCandidate());
|
||||
}
|
||||
// first match everything as best as possible, then filter possibly bad matches
|
||||
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
|
||||
subtitlesByFile.get(it.getKey()).add(it.getValue());
|
||||
}
|
||||
|
||||
// add other possible matches to the options
|
||||
float minMatchSimilarity = 0.6f;
|
||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
||||
float minMatchSimilarity = 0.5f;
|
||||
|
||||
for (File file : files) {
|
||||
// add matching subtitles
|
||||
|
@ -1020,7 +1034,8 @@ class SubtitleAutoMatchDialog extends JDialog {
|
|||
|
||||
@Override
|
||||
public float getMatchProbabilty(File videoFile, SubtitleDescriptor descriptor) {
|
||||
return EpisodeMetrics.verificationMetric().getSimilarity(videoFile, descriptor) * 0.9f;
|
||||
SimilarityMetric metric = new MetricCascade(EpisodeMetrics.SeasonEpisode, EpisodeMetrics.AirDate, EpisodeMetrics.Name);
|
||||
return 0.9f * metric.getSimilarity(videoFile, descriptor);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue