* super charge movie auto-detection using a local movie index and use it for common-word-sequence matching

* use language specific Collator wherever matching movie names / file / so we get perfect matching even with accents, umlauts, half-width/full-width characters, etc
This commit is contained in:
Reinhard Pointner 2012-01-02 03:48:24 +00:00
parent 6707a94518
commit 90cc0a06fa
12 changed files with 270 additions and 96 deletions

View File

@ -98,7 +98,7 @@ public class CmdlineOperations implements CmdlineInterface {
int cws = 0; // common word sequence
double max = mediaFiles.size();
SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(getLenientCollator(locale));
Collection<String> cwsList = emptySet();
if (max >= 5) {
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
@ -137,7 +137,7 @@ public class CmdlineOperations implements CmdlineInterface {
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
// auto-determine optimal batch sets
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles).entrySet()) {
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
List<List<File>> batchSets = new ArrayList<List<File>>();
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
@ -150,7 +150,7 @@ public class CmdlineOperations implements CmdlineInterface {
for (List<File> batch : batchSets) {
// auto-detect series name if not given
Collection<String> seriesNames = (query == null) ? detectQuery(batch, strict) : singleton(query);
Collection<String> seriesNames = (query == null) ? detectQuery(batch, locale, strict) : singleton(query);
// fetch episode data
Set<Episode> episodes = fetchEpisodeSet(db, seriesNames, locale, strict);
@ -297,6 +297,7 @@ public class CmdlineOperations implements CmdlineInterface {
// unknown hash, try via imdb id from nfo file
if (movie == null) {
CLILogger.fine(format("Auto-detect movie from context: [%s]", movieFiles[i]));
Collection<Movie> results = detectMovie(movieFiles[i], null, service, locale, strict);
movie = (Movie) selectSearchResult(query, results, strict).get(0);
@ -463,7 +464,7 @@ public class CmdlineOperations implements CmdlineInterface {
// lookup subtitles via text search, only perform hash lookup in strict mode
if ((query != null || !strict) && !collector.isComplete()) {
// auto-detect search query
Collection<String> querySet = (query == null) ? detectQuery(filter(files, VIDEO_FILES), false) : singleton(query);
Collection<String> querySet = (query == null) ? detectQuery(filter(files, VIDEO_FILES), language.toLocale(), false) : singleton(query);
for (SubtitleProvider service : WebServices.getSubtitleProviders()) {
if (collector.isComplete()) {
@ -618,9 +619,9 @@ public class CmdlineOperations implements CmdlineInterface {
}
private List<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
private List<String> detectQuery(Collection<File> mediaFiles, Locale locale, boolean strict) throws Exception {
// detect series name by common word sequence
List<String> names = detectSeriesNames(mediaFiles);
List<String> names = detectSeriesNames(mediaFiles, locale);
if (names.isEmpty() || (strict && names.size() > 1)) {
throw new Exception("Unable to auto-select query: " + names);

View File

@ -10,6 +10,7 @@ import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
@ -24,6 +25,9 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
@ -42,15 +46,18 @@ import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
public class MediaDetection {
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files) throws Exception {
private static ReleaseInfo releaseInfo = new ReleaseInfo();
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
SortedMap<File, List<File>> filesByFolder = mapByFolder(filter(files, VIDEO_FILES, SUBTITLE_FILES));
// map series names by folder
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
for (Entry<File, List<File>> it : filesByFolder.entrySet()) {
Set<String> namesForFolder = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
namesForFolder.addAll(detectSeriesNames(it.getValue()));
Set<String> namesForFolder = new TreeSet<String>(getLenientCollator(locale));
namesForFolder.addAll(detectSeriesNames(it.getValue(), locale));
seriesNamesByFolder.put(it.getKey(), namesForFolder);
}
@ -74,7 +81,7 @@ public class MediaDetection {
Map<Set<File>, Set<String>> batchSets = new HashMap<Set<File>, Set<String>>();
while (seriesNamesByFolder.size() > 0) {
Set<String> combinedNameSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
Set<String> combinedNameSet = new TreeSet<String>(getLenientCollator(locale));
Set<File> combinedFolderSet = new HashSet<File>();
// build combined match set
@ -116,12 +123,12 @@ public class MediaDetection {
}
public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
public static List<String> detectSeriesNames(Collection<File> files, Locale locale) throws Exception {
// don't allow duplicates
Map<String, String> names = new LinkedHashMap<String, String>();
try {
for (SearchResult it : lookupSeriesNameByInfoFile(files, Locale.ENGLISH)) {
for (SearchResult it : lookupSeriesNameByInfoFile(files, locale)) {
names.put(it.getName().toLowerCase(), it.getName());
}
} catch (Exception e) {
@ -129,10 +136,10 @@ public class MediaDetection {
}
// match common word sequence and clean detected word sequence from unwanted elements
Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
Collection<String> matches = new SeriesNameMatcher(getLenientCollator(locale)).matchAll(files.toArray(new File[files.size()]));
try {
matches = stripReleaseInfo(matches);
matches = stripReleaseInfo(matches, true);
} catch (Exception e) {
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to clean matches: " + e.getMessage(), e);
}
@ -148,6 +155,7 @@ public class MediaDetection {
public static Collection<Movie> detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception {
Set<Movie> options = new LinkedHashSet<Movie>();
// lookup by file hash
if (hashLookupService != null) {
for (Movie movie : hashLookupService.getMovieDescriptors(new File[] { movieFile }, locale)) {
if (movie != null) {
@ -156,29 +164,102 @@ public class MediaDetection {
}
}
// lookup by id from nfo file
if (queryLookupService != null) {
// try to grep imdb id from nfo files
for (int imdbid : grepImdbIdFor(movieFile)) {
Movie movie = queryLookupService.getMovieDescriptor(imdbid, locale);
if (movie != null) {
options.add(movie);
}
}
}
if (queryLookupService != null && !strict && options.isEmpty()) {
// search by file name or folder name
Collection<String> searchQueries = new LinkedHashSet<String>();
searchQueries.add(getName(movieFile));
searchQueries.add(getName(movieFile.getParentFile()));
List<String> files = new ArrayList<String>();
files.add(getName(movieFile));
files.add(getName(movieFile.getParentFile()));
long t = System.currentTimeMillis();
List<Movie> movieNameMatches = matchMovieName(files, locale, strict);
System.out.println(System.currentTimeMillis() - t);
// skip further queries if collected matches are already sufficient
if (options.size() > 0 && movieNameMatches.size() > 0) {
options.addAll(movieNameMatches);
return options;
}
// continue gathering more matches if possible
options.addAll(movieNameMatches);
// query by file / folder name
if (queryLookupService != null && !strict) {
options.addAll(queryMovieByFileName(files, queryLookupService, locale));
}
return options;
}
private static List<Movie> matchMovieName(final List<String> files, final Locale locale, final boolean strict) throws Exception {
// cross-reference file / folder name with movie list
final SeriesNameMatcher nameMatcher = new SeriesNameMatcher(String.CASE_INSENSITIVE_ORDER); // use simple comparator for speed (2-3x faster)
final Map<Movie, String> matchMap = synchronizedMap(new HashMap<Movie, String>());
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
for (final Movie movie : releaseInfo.getMovieList()) {
executor.submit(new Runnable() {
@Override
public void run() {
for (String name : files) {
String movieIdentifier = movie.getName();
String commonName = nameMatcher.matchByFirstCommonWordSequence(name, movieIdentifier);
if (commonName != null && commonName.length() >= movieIdentifier.length()) {
String strictMovieIdentifier = movie.getName() + " " + movie.getYear();
String strictCommonName = nameMatcher.matchByFirstCommonWordSequence(name, strictMovieIdentifier);
if (strictCommonName != null && strictCommonName.length() >= strictMovieIdentifier.length()) {
// prefer strict match
matchMap.put(movie, strictCommonName);
} else if (!strict) {
// make sure the common identifier is not just the year
matchMap.put(movie, commonName);
}
}
}
}
});
}
// wait for last task to finish
executor.shutdown();
executor.awaitTermination(1, TimeUnit.MINUTES);
// sort by length of name match (descending)
List<Movie> results = new ArrayList<Movie>(matchMap.keySet());
sort(results, new Comparator<Movie>() {
@Override
public int compare(Movie a, Movie b) {
return Integer.compare(matchMap.get(b).length(), matchMap.get(a).length());
}
});
return results;
}
private static Collection<Movie> queryMovieByFileName(List<String> files, MovieIdentificationService queryLookupService, Locale locale) throws Exception {
// remove blacklisted terms
searchQueries = stripReleaseInfo(searchQueries);
Set<String> querySet = new LinkedHashSet<String>();
querySet.addAll(stripReleaseInfo(files, true));
querySet.addAll(stripReleaseInfo(files, false));
final SimilarityMetric metric = new NameSimilarityMetric();
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
for (String query : searchQueries) {
for (String query : querySet) {
for (Movie movie : queryLookupService.searchMovie(query, locale)) {
probabilityMap.put(movie, metric.getSimilarity(query, movie));
}
@ -194,20 +275,17 @@ public class MediaDetection {
}
});
options.addAll(results);
}
return options;
return results;
}
public static String stripReleaseInfo(String name) throws IOException {
return new ReleaseInfo().cleanRelease(name);
return releaseInfo.cleanRelease(name, true);
}
public static List<String> stripReleaseInfo(Collection<String> names) throws IOException {
return new ReleaseInfo().cleanRelease(names);
public static List<String> stripReleaseInfo(Collection<String> names, boolean strict) throws IOException {
return releaseInfo.cleanRelease(names, strict);
}
@ -284,4 +362,13 @@ public class MediaDetection {
return collection;
}
public static Comparator<String> getLenientCollator(Locale locale) {
// use maximum strength collator by default
final Collator collator = Collator.getInstance(locale);
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
collator.setStrength(Collator.TERTIARY);
return (Comparator) collator;
}
}

View File

@ -4,6 +4,7 @@ package net.sourceforge.filebot.media;
import static java.util.ResourceBundle.*;
import static java.util.regex.Pattern.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.StringUtilities.*;
import java.io.File;
@ -11,33 +12,40 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import net.sourceforge.filebot.web.CachedResource;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.tuned.ByteBufferInputStream;
public class ReleaseInfo {
public String getVideoSource(File file) {
// check parent and itself for group names
return matchLast(getVideoSourcePattern(), file.getParent(), file.getName());
return matchLast(getVideoSourcePattern(), getBundle(getClass().getName()).getString("pattern.video.source").split("[|]"), file.getParent(), file.getName());
}
public String getReleaseGroup(File file) throws IOException {
// check parent and itself for group names
return matchLast(getReleaseGroupPattern(), file.getParent(), file.getName());
return matchLast(getReleaseGroupPattern(false), releaseGroupResource.get(), file.getParent(), file.getName());
}
protected String matchLast(Pattern pattern, CharSequence... sequence) {
protected String matchLast(Pattern pattern, String[] standardValues, CharSequence... sequence) {
String lastMatch = null;
// match last occurrence
for (CharSequence name : sequence) {
if (name == null)
continue;
@ -48,24 +56,36 @@ public class ReleaseInfo {
}
}
// prefer standard value over matched value
if (lastMatch != null) {
for (String standard : standardValues) {
if (standard.equalsIgnoreCase(lastMatch)) {
return standard;
}
}
}
return lastMatch;
}
public List<String> cleanRelease(Iterable<String> items) throws IOException {
return clean(items, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern());
public List<String> cleanRelease(Iterable<String> items, boolean strict) throws IOException {
return clean(items, getReleaseGroupPattern(strict), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern(false));
}
public String cleanRelease(String item) throws IOException {
return clean(item, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern());
public String cleanRelease(String item, boolean strict) throws IOException {
return clean(item, getReleaseGroupPattern(strict), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern(false));
}
public List<String> clean(Iterable<String> items, Pattern... blacklisted) {
List<String> cleanedItems = new ArrayList<String>();
for (String it : items) {
cleanedItems.add(clean(it, blacklisted));
String cleanedItem = clean(it, blacklisted);
if (cleanedItem.length() > 0) {
cleanedItems.add(cleanedItem);
}
}
return cleanedItems;
@ -77,7 +97,7 @@ public class ReleaseInfo {
item = it.matcher(item).replaceAll("");
}
return item.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim();
return normalizePunctuation(item);
}
@ -88,14 +108,16 @@ public class ReleaseInfo {
Locale locale = new Locale(code);
tokens.add(locale.getLanguage());
tokens.add(locale.getISO3Language());
tokens.add(locale.getDisplayLanguage(Locale.ENGLISH));
for (Locale language : new HashSet<Locale>(Arrays.asList(Locale.ENGLISH, Locale.getDefault()))) {
tokens.add(locale.getDisplayLanguage(language));
}
}
// remove illegal tokens
tokens.remove("");
// .{language}[.srt]
return compile("(?<=[.])(" + join(tokens, "|") + ")(?=$)", CASE_INSENSITIVE);
return compile("(?<=\\p{Punct})(" + join(tokens, "|") + ")(?=$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
}
@ -119,21 +141,27 @@ public class ReleaseInfo {
}
public Pattern getReleaseGroupPattern() throws IOException {
public synchronized Pattern getReleaseGroupPattern(boolean strict) throws IOException {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
}
public Pattern getBlacklistPattern() throws IOException {
public synchronized Pattern getBlacklistPattern(boolean strict) throws IOException {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
}
public synchronized Movie[] getMovieList() throws IOException {
return movieListResource.get();
}
// fetch release group names online and try to update the data every other day
protected final PatternResource releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
protected final PatternResource queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
protected static class PatternResource extends CachedResource<String[]> {
@ -149,4 +177,28 @@ public class ReleaseInfo {
}
}
protected static class MovieResource extends CachedResource<Movie[]> {
public MovieResource(String resource) {
super(resource, Movie[].class, 24 * 60 * 60 * 1000); // 24h update interval
}
@Override
public Movie[] process(ByteBuffer data) throws IOException {
Scanner scanner = new Scanner(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n");
List<Movie> movies = new ArrayList<Movie>();
while (scanner.hasNext()) {
int imdbid = scanner.nextInt();
String name = scanner.next();
int year = scanner.nextInt();
movies.add(new Movie(name, year, imdbid));
}
return movies.toArray(new Movie[0]);
}
}
}

View File

@ -2,10 +2,13 @@
pattern.video.source: CAMRip|CAM|TS|TELESYNC|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R5|R5LINE|DVDRip|DVDR|TVRip|DSR|PDTV|HDTV|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR|WorkPrint|VHS|VCD
# additional release info patterns
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|ac3|2ch|6ch|ws|hr|720p|1080p
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|ac3|2ch|6ch|WS|HR|720p|1080p
# group names mostly copied from [http://scenelingo.wordpress.com/list-of-scene-release-groups]
url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
# blacklisted terms that will be ignored
url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt
# list of all movies (id, name, year)
url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz

View File

@ -3,6 +3,8 @@ package net.sourceforge.filebot.similarity;
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.StringUtilities.*;
import java.io.File;
@ -28,10 +30,21 @@ import net.sourceforge.tuned.FileUtilities;
public class SeriesNameMatcher {
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
protected final NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
protected final int commonWordSequenceMaxStartIndex = 3;
protected int commonWordSequenceMaxStartIndex = 3;
protected Comparator<String> commonWordComparator;
public SeriesNameMatcher() {
this(String.CASE_INSENSITIVE_ORDER);
}
public SeriesNameMatcher(Comparator<String> comparator) {
this.commonWordComparator = comparator;
}
public Collection<String> matchAll(File[] files) {
@ -75,7 +88,7 @@ public class SeriesNameMatcher {
whitelist.addAll(deepMatchAll(focus, threshold));
// 1. use pattern matching
seriesNames.addAll(flatMatchAll(names, Pattern.compile(join(whitelist, "|"), Pattern.CASE_INSENSITIVE), threshold, false));
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ), threshold, false));
// 2. use common word sequences
seriesNames.addAll(whitelist);
@ -92,7 +105,7 @@ public class SeriesNameMatcher {
* threshold
*/
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER);
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, commonWordComparator);
for (String name : names) {
// use normalized name
@ -191,7 +204,7 @@ public class SeriesNameMatcher {
common = words;
} else {
// find common sequence
common = firstCommonSequence(common, words, commonWordSequenceMaxStartIndex, String.CASE_INSENSITIVE_ORDER);
common = firstCommonSequence(common, words, commonWordSequenceMaxStartIndex, commonWordComparator);
if (common == null) {
// no common sequence
@ -209,14 +222,12 @@ public class SeriesNameMatcher {
protected String normalize(String name) {
// remove group names and checksums, any [...] or (...)
name = name.replaceAll("\\([^\\(]*\\)", " ");
name = name.replaceAll("\\[[^\\[]*\\]", " ");
name = normalizeBrackets(name);
// remove/normalize special characters
name = name.replaceAll("['`´]+", "");
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
name = normalizePunctuation(name);
return name.trim();
return name;
}

View File

@ -173,7 +173,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
List<Callable<List<Match<File, ?>>>> taskPerFolder = new ArrayList<Callable<List<Match<File, ?>>>>();
// detect series names and create episode list fetch tasks
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles).entrySet()) {
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
List<List<File>> batchSets = new ArrayList<List<File>>();
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
@ -219,7 +219,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
// detect series name and fetch episode list
if (autodetection) {
Collection<String> names = detectSeriesNames(files);
Collection<String> names = detectSeriesNames(files, locale);
if (names.size() > 0) {
// only allow one fetch session at a time so later requests can make use of cached results
synchronized (provider) {

View File

@ -192,6 +192,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
selectDialog.setTitle(movieFile.getPath());
selectDialog.getHeaderLabel().setText(String.format("Movies matching '%s':", stripReleaseInfo(getName(movieFile))));
selectDialog.getCancelAction().putValue(Action.NAME, "Ignore");
selectDialog.pack();
// show dialog
selectDialog.setLocation(getOffsetLocation(selectDialog.getOwner()));

View File

@ -26,6 +26,7 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
@ -973,7 +974,7 @@ class SubtitleAutoMatchDialog extends JDialog {
}
// auto-detect query and search for subtitles
Collection<String> querySet = detectSeriesNames(files);
Collection<String> querySet = detectSeriesNames(files, Locale.ENGLISH);
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
// if auto-detection fails, ask user for input

View File

@ -19,23 +19,32 @@ public class ByteBufferInputStream extends InputStream {
@Override
public int read() throws IOException {
if (buffer.remaining() <= 0)
return -1;
return buffer.get();
return (buffer.position() < buffer.limit()) ? (buffer.get() & 0xff) : -1;
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
if (buffer.remaining() <= 0)
if (b == null) {
throw new NullPointerException();
} else if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
}
if (buffer.position() >= buffer.limit()) {
return -1;
}
int length = Math.min(len, buffer.remaining());
if (len > buffer.remaining()) {
len = buffer.remaining();
}
buffer.get(b, off, length);
if (len <= 0) {
return 0;
}
return length;
buffer.get(b, off, len);
return len;
}

BIN
website/data/movies.txt.gz Normal file

Binary file not shown.

View File

@ -1,10 +1,17 @@
PROPER
RETAIL
^(TV.)?(Show|Serie|Anime)[s]?$
^Movie[s]?$
^Video[s]?$
CD[1-3]
Demonoid
ExtraScene
ExtraTorrent
PROPER
Hard.Subbed
mkvonly
MVGroup.org
READNFO
REPACK
RETAIL
ShareReactor
ShareZONE
UsaBit.com

View File

@ -25,6 +25,7 @@ BAJSKORV
BamHD
Barba
BaSS
BAUM
BDiSC
BiA
BlueTV
@ -167,6 +168,7 @@ LMAO
LoD
LOL
LOLCATS
LTT
MAiN
MainEvent
MARiNES