* rewrite subtitle auto-selection (should work much better now for both strict and -non-strict modes)

This commit is contained in:
Reinhard Pointner 2014-01-24 16:01:37 +00:00
parent f5b4dbee19
commit 54d4dad955
6 changed files with 172 additions and 164 deletions

View File

@ -639,18 +639,24 @@ public class CmdlineOperations implements CmdlineInterface {
final Language language = getLanguage(languageName);
final Pattern databaseFilter = (db != null) ? Pattern.compile(db, Pattern.CASE_INSENSITIVE) : null;
final SubtitleNaming naming = getSubtitleNaming(format);
CLILogger.finest(String.format("Get [%s] subtitles for %d files", language.getName(), files.size()));
// when rewriting subtitles to target format an encoding must be defined, default to UTF-8
final Charset outputEncoding = (csn != null) ? Charset.forName(csn) : (output != null) ? Charset.forName("UTF-8") : null;
final SubtitleFormat outputFormat = (output != null) ? getSubtitleFormatByName(output) : null;
// ignore anything that is not a video
files = filter(files, VIDEO_FILES);
// ignore clutter files from processing
files = filter(files, not(getClutterFileFilter()));
// try to find subtitles for each video file
List<File> remainingVideos = new ArrayList<File>(filter(files, VIDEO_FILES));
List<File> remainingVideos = new ArrayList<File>(files);
// parallel download
List<File> subtitleFiles = new ArrayList<File>();
CLILogger.finest(String.format("Get [%s] subtitles for %d files", language.getName(), remainingVideos.size()));
if (remainingVideos.isEmpty()) {
throw new Exception("No video files: " + files);
}
@ -672,49 +678,24 @@ public class CmdlineOperations implements CmdlineInterface {
}
}
// lookup subtitles via text search, only perform hash lookup in strict mode
if (!remainingVideos.isEmpty()) {
// auto-detect search query
Set<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
if (query == null) {
try {
List<File> videoFiles = filter(files, VIDEO_FILES);
querySet.addAll(detectSeriesNames(videoFiles, true, false, language.getLocale()));
// auto-detect movie names
for (File f : videoFiles) {
if (!isEpisode(f.getName(), false)) {
for (Movie movie : detectMovie(f, null, null, language.getLocale(), strict)) {
querySet.add(movie.getName());
}
}
}
} catch (Exception e) {
CLILogger.warning("Movie detection failed: " + e.getMessage());
}
if (querySet.isEmpty()) {
throw new Exception("Failed to auto-detect query");
}
} else {
querySet.add(query);
for (SubtitleProvider service : getSubtitleProviders()) {
if (remainingVideos.isEmpty() || (databaseFilter != null && !databaseFilter.matcher(service.getName()).matches())) {
continue;
}
for (SubtitleProvider service : getSubtitleProviders()) {
if (remainingVideos.isEmpty() || (databaseFilter != null && !databaseFilter.matcher(service.getName()).matches())) {
continue;
}
try {
CLILogger.fine(format("Searching for %s at [%s]", querySet, service.getName()));
Map<File, SubtitleDescriptor> subtitles = lookupSubtitleByFileName(service, querySet, language, remainingVideos, strict);
Map<File, File> downloads = downloadSubtitleBatch(service.getName(), subtitles, outputFormat, outputEncoding, naming);
remainingVideos.removeAll(downloads.keySet());
subtitleFiles.addAll(downloads.values());
} catch (Exception e) {
CLILogger.warning(format("Search for %s failed: %s", querySet, e.getMessage()));
try {
CLILogger.fine(format("Looking up subtitles by name via %s", service.getName()));
Map<File, SubtitleDescriptor> subtitles = new TreeMap<File, SubtitleDescriptor>();
for (Entry<File, List<SubtitleDescriptor>> it : findSubtitleMatches(service, remainingVideos, language.getName(), query, false, strict).entrySet()) {
if (it.getValue().size() > 0) {
subtitles.put(it.getKey(), it.getValue().get(0));
}
}
Map<File, File> downloads = downloadSubtitleBatch(service.getName(), subtitles, outputFormat, outputEncoding, naming);
remainingVideos.removeAll(downloads.keySet());
subtitleFiles.addAll(downloads.values());
} catch (Exception e) {
CLILogger.warning(format("Search by name failed: %s", e.getMessage()));
}
}
@ -823,37 +804,21 @@ public class CmdlineOperations implements CmdlineInterface {
}
private Map<File, SubtitleDescriptor> lookupSubtitleByHash(VideoHashSubtitleService service, Language language, Collection<File> videoFiles) throws Exception {
Map<File, SubtitleDescriptor> subtitleByVideo = new HashMap<File, SubtitleDescriptor>(videoFiles.size());
Map<File, SubtitleDescriptor> subtitleByVideo = new TreeMap<File, SubtitleDescriptor>();
for (Entry<File, List<SubtitleDescriptor>> it : service.getSubtitleList(videoFiles.toArray(new File[0]), language.getName()).entrySet()) {
if (it.getValue() != null && it.getValue().size() > 0) {
// guess best hash match (default order is open bad due to invalid hash links)
Entry<File, SubtitleDescriptor> bestMatch = matchSubtitles(singleton(it.getKey()), it.getValue(), false).entrySet().iterator().next();
// guess best hash match (default order is open bad due to invalid hash links)
SubtitleDescriptor bestMatch = getBestMatch(it.getKey(), it.getValue(), false);
CLILogger.finest(format("Matched [%s] to [%s] via filehash", bestMatch.getKey().getName(), bestMatch.getValue().getName()));
subtitleByVideo.put(bestMatch.getKey(), bestMatch.getValue());
if (bestMatch != null) {
CLILogger.finest(format("Matched [%s] to [%s] via filehash", it.getKey().getName(), bestMatch.getName()));
subtitleByVideo.put(it.getKey(), bestMatch);
}
}
return subtitleByVideo;
}
private Map<File, SubtitleDescriptor> lookupSubtitleByFileName(SubtitleProvider service, Collection<String> querySet, Language language, Collection<File> videoFiles, boolean strict) throws Exception {
// search for subtitles
Set<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, language.getName());
// match subtitle files to video files
if (subtitles.size() > 0) {
Map<File, SubtitleDescriptor> subtitleByVideo = matchSubtitles(videoFiles, subtitles, strict);
for (Entry<File, SubtitleDescriptor> it : subtitleByVideo.entrySet()) {
CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getKey().getName(), it.getValue().getName()));
}
return subtitleByVideo;
}
return emptyMap();
}
private <T> List<T> applyExpressionFilter(Collection<T> input, ExpressionFilter filter) throws Exception {
if (filter == null) {
return new ArrayList<T>(input);

View File

@ -33,6 +33,7 @@ import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
@ -955,6 +956,24 @@ public class MediaDetection {
return mediaFolders;
}
public static Map<String, List<File>> mapBySeriesName(Collection<File> files, boolean useSeriesIndex, boolean useAnimeIndex, Locale locale) throws Exception {
Map<String, List<File>> result = new TreeMap<String, List<File>>(String.CASE_INSENSITIVE_ORDER);
for (File f : files) {
List<String> names = detectSeriesNames(singleton(f), useSeriesIndex, useAnimeIndex, locale);
String key = names.isEmpty() ? "" : names.get(0);
List<File> value = result.get(key);
if (value == null) {
value = new ArrayList<File>();
result.put(key, value);
}
value.add(f);
}
return result;
}
public static File guessMediaFolder(File file) {
List<File> tail = listPathTail(file, 3, true);

View File

@ -1,6 +1,7 @@
package net.sourceforge.filebot.subtitle;
import static java.lang.Math.*;
import static java.util.Collections.*;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.media.MediaDetection.*;
import static net.sourceforge.filebot.similarity.EpisodeMetrics.*;
@ -15,16 +16,22 @@ import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import net.sourceforge.filebot.Language;
import net.sourceforge.filebot.similarity.EpisodeMetrics;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.MetricAvg;
@ -34,12 +41,105 @@ import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.vfs.ArchiveType;
import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.SearchResult;
import net.sourceforge.filebot.web.SubtitleDescriptor;
import net.sourceforge.filebot.web.SubtitleProvider;
public final class SubtitleUtilities {
public static Map<File, List<SubtitleDescriptor>> findSubtitleMatches(SubtitleProvider service, Collection<File> fileSet, String languageName, String forceQuery, boolean addOptions, boolean strict) throws Exception {
// ignore anything that is not a video
fileSet = filter(fileSet, VIDEO_FILES);
// ignore clutter files from processing
fileSet = filter(fileSet, not(getClutterFileFilter()));
// collect results
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
for (List<File> byMediaFolder : mapByMediaFolder(fileSet).values()) {
for (Entry<String, List<File>> bySeries : mapBySeriesName(byMediaFolder, true, false, Locale.ENGLISH).entrySet()) {
// auto-detect query and search for subtitles
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
List<File> files = bySeries.getValue();
if (forceQuery != null && forceQuery.length() > 0) {
querySet.add(forceQuery);
} else if (bySeries.getKey().length() > 0) {
// use auto-detected series name as query
querySet.add(bySeries.getKey());
} else {
for (File f : files) {
List<String> queries = new ArrayList<String>();
// might be a movie, auto-detect movie names
if (!isEpisode(f.getPath(), true)) {
for (Movie it : detectMovie(f, null, null, Locale.ENGLISH, strict)) {
queries.add(it.getName());
}
}
if (queries.isEmpty()) {
queries.add(stripReleaseInfo(getName(f)));
}
querySet.addAll(queries);
}
}
Set<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
// dialog may have been cancelled by now
if (Thread.interrupted()) {
throw new InterruptedException();
}
// files by possible subtitles matches
for (File file : files) {
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
}
// add other possible matches to the options
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
float minMatchSimilarity = strict ? 0.9f : 0.6f;
// first match everything as best as possible, then filter possibly bad matches
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, strict).entrySet()) {
if (sanity.getSimilarity(it.getKey(), it.getValue()) >= minMatchSimilarity) {
subtitlesByFile.get(it.getKey()).add(it.getValue());
}
}
// this could be very slow, lets hope at this point there is not much left due to positive hash matches
for (File file : files) {
// add matching subtitles
for (SubtitleDescriptor it : subtitles) {
// grab only the first best option unless we really want all options
if (!addOptions && subtitlesByFile.get(file).size() >= 1)
continue;
// ignore if it's already been added
if (subtitlesByFile.get(file).contains(it))
continue;
// ignore if we're sure that SxE is a negative match
if (isEpisode(it.getName(), true) && isEpisode(file.getPath(), true) && EpisodeMetrics.EpisodeFunnel.getSimilarity(file, it) < 1)
continue;
// ignore if it's not similar enough
if (sanity.getSimilarity(file, it) < minMatchSimilarity)
continue;
subtitlesByFile.get(file).add(it);
}
}
}
}
return subtitlesByFile;
}
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
@ -106,6 +206,20 @@ public final class SubtitleUtilities {
return probableMatches;
}
public static SubtitleDescriptor getBestMatch(File file, Collection<SubtitleDescriptor> subtitles, boolean strict) {
if (file == null || subtitles == null || subtitles.isEmpty()) {
return null;
}
try {
return matchSubtitles(singleton(file), subtitles, strict).entrySet().iterator().next().getValue();
} catch (NoSuchElementException e) {
return null;
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
/**
* Detect charset and parse subtitle file even if extension is invalid
*/

View File

@ -1,12 +1,9 @@
package net.sourceforge.filebot.ui.subtitle;
import static java.util.Collections.*;
import static javax.swing.BorderFactory.*;
import static javax.swing.JOptionPane.*;
import static net.sourceforge.filebot.media.MediaDetection.*;
import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
import static net.sourceforge.tuned.FileUtilities.*;
import static net.sourceforge.tuned.StringUtilities.*;
import static net.sourceforge.tuned.ui.TunedUtilities.*;
import java.awt.Color;
@ -26,7 +23,6 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@ -67,7 +63,6 @@ import net.sourceforge.filebot.similarity.MetricCascade;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.subtitle.SubtitleNaming;
import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.SubtitleDescriptor;
import net.sourceforge.filebot.web.SubtitleProvider;
import net.sourceforge.filebot.web.VideoHashSubtitleService;
@ -192,18 +187,6 @@ class SubtitleAutoMatchDialog extends JDialog {
servicePanel.add(component);
}
// remember last user input
private List<String> userQuery = new ArrayList<String>();
protected List<String> getUserQuery(String suggestion, String title, Component parent) throws Exception {
synchronized (userQuery) {
if (userQuery.isEmpty()) {
userQuery.addAll(showMultiValueInputDialog("Enter series / movie names:", suggestion, title, parent));
}
return userQuery;
}
}
public void startQuery(String languageName) {
final SubtitleMappingTableModel mappingModel = (SubtitleMappingTableModel) subtitleMappingTable.getModel();
QueryTask queryTask = new QueryTask(services, mappingModel.getVideoFiles(), languageName, SubtitleAutoMatchDialog.this) {
@ -735,10 +718,11 @@ class SubtitleAutoMatchDialog extends JDialog {
Set<SubtitleDescriptor> subtitlesByRelevance = new LinkedHashSet<SubtitleDescriptor>();
// guess best hash match (default order is open bad due to invalid hash links)
if (result.getValue().size() > 0) {
Entry<File, SubtitleDescriptor> bestMatch = matchSubtitles(singleton(result.getKey()), result.getValue(), false).entrySet().iterator().next();
subtitlesByRelevance.add(bestMatch.getValue());
SubtitleDescriptor bestMatch = getBestMatch(result.getKey(), result.getValue(), false);
if (bestMatch != null) {
subtitlesByRelevance.add(bestMatch);
}
subtitlesByRelevance.addAll(result.getValue());
// associate subtitles with services
@ -924,72 +908,7 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> fileSet, String languageName, Component parent) throws Exception {
// ignore clutter files from processing
fileSet = filter(fileSet, not(getClutterFileFilter()));
// collect results
Map<File, List<SubtitleDescriptor>> subtitlesByFile = new HashMap<File, List<SubtitleDescriptor>>();
for (List<File> files : mapByMediaFolder(fileSet).values()) {
// auto-detect query and search for subtitles
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
// auto-detect series names
querySet.addAll(detectSeriesNames(files, true, false, Locale.ROOT));
// auto-detect movie names
for (File f : files) {
if (!isEpisode(f.getName(), false)) {
for (Movie movie : detectMovie(f, null, null, Locale.ROOT, false)) {
querySet.add(movie.getName());
}
}
}
Set<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
// dialog may have been cancelled by now
if (Thread.interrupted()) {
throw new CancellationException();
}
// if auto-detection fails, ask user for input
if (subtitles.isEmpty()) {
querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent);
subtitles = findSubtitles(service, querySet, languageName);
// still no luck... na women ye mei banfa
if (subtitles.isEmpty()) {
throw new Exception("Unable to lookup subtitles: " + querySet);
}
}
// files by possible subtitles matches
for (File file : files) {
subtitlesByFile.put(file, new ArrayList<SubtitleDescriptor>());
}
// first match everything as best as possible, then filter possibly bad matches
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
subtitlesByFile.get(it.getKey()).add(it.getValue());
}
// add other possible matches to the options
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
float minMatchSimilarity = 0.5f;
// this could be very slow, lets hope at this point there is not much left due to positive hash matches
for (File file : files) {
// add matching subtitles
for (SubtitleDescriptor it : subtitles) {
if (!subtitlesByFile.get(file).contains(it) && sanity.getSimilarity(file, it) >= minMatchSimilarity) {
subtitlesByFile.get(file).add(it);
}
}
}
}
return subtitlesByFile;
return findSubtitleMatches(service, fileSet, languageName, null, true, false);
}
@Override

View File

@ -16,6 +16,7 @@ import java.awt.geom.Path2D;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.logging.Level;
@ -36,12 +37,9 @@ import net.sourceforge.filebot.Language;
import net.sourceforge.filebot.ResourceManager;
import net.sourceforge.filebot.Settings;
import net.sourceforge.filebot.WebServices;
import net.sourceforge.filebot.media.MediaDetection;
import net.sourceforge.filebot.similarity.Normalization;
import net.sourceforge.filebot.ui.AbstractSearchPanel;
import net.sourceforge.filebot.ui.LanguageComboBox;
import net.sourceforge.filebot.ui.SelectDialog;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.OpenSubtitlesClient;
import net.sourceforge.filebot.web.SearchResult;
import net.sourceforge.filebot.web.SubtitleDescriptor;
@ -142,16 +140,7 @@ public class SubtitlePanel extends AbstractSearchPanel<SubtitleProvider, Subtitl
};
protected Collection<String> getHistory(SubtitleProvider engine) throws Exception {
final List<String> names = new ArrayList<String>(200000);
for (Movie it : MediaDetection.releaseInfo.getMovieList()) {
names.addAll(it.getEffectiveNamesWithoutYear());
}
for (SearchResult it : MediaDetection.releaseInfo.getTheTVDBIndex()) {
for (String n : it.getEffectiveNames()) {
names.add(Normalization.removeTrailingBrackets(n));
}
}
return names;
return Collections.emptyList();
};
@Override

View File

@ -92,7 +92,8 @@
^Romance$
^rtorrent$
^Science.Fiction$
^Scratch$
^scratch$
^scratch.area$
^Season$
^Seeding$
^Seeds$
@ -136,6 +137,7 @@
^Volumes$
^watch$
^www$
^XXX+$
A.PROCESAR
A.Release.Lounge
ABC