* super charge movie auto-detection using a local movie index and use it for common-word-sequence matching
* use language specific Collator wherever matching movie names / file / so we get perfect matching even with accents, umlauts, half-width/full-width characters, etc
This commit is contained in:
parent
6707a94518
commit
90cc0a06fa
|
@ -98,7 +98,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
int cws = 0; // common word sequence
|
||||
double max = mediaFiles.size();
|
||||
|
||||
SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
|
||||
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(getLenientCollator(locale));
|
||||
Collection<String> cwsList = emptySet();
|
||||
if (max >= 5) {
|
||||
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
|
||||
|
@ -137,7 +137,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
|
||||
|
||||
// auto-determine optimal batch sets
|
||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles).entrySet()) {
|
||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
|
||||
List<List<File>> batchSets = new ArrayList<List<File>>();
|
||||
|
||||
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
|
||||
|
@ -150,7 +150,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
|
||||
for (List<File> batch : batchSets) {
|
||||
// auto-detect series name if not given
|
||||
Collection<String> seriesNames = (query == null) ? detectQuery(batch, strict) : singleton(query);
|
||||
Collection<String> seriesNames = (query == null) ? detectQuery(batch, locale, strict) : singleton(query);
|
||||
|
||||
// fetch episode data
|
||||
Set<Episode> episodes = fetchEpisodeSet(db, seriesNames, locale, strict);
|
||||
|
@ -297,6 +297,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
|
||||
// unknown hash, try via imdb id from nfo file
|
||||
if (movie == null) {
|
||||
CLILogger.fine(format("Auto-detect movie from context: [%s]", movieFiles[i]));
|
||||
Collection<Movie> results = detectMovie(movieFiles[i], null, service, locale, strict);
|
||||
movie = (Movie) selectSearchResult(query, results, strict).get(0);
|
||||
|
||||
|
@ -463,7 +464,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
// lookup subtitles via text search, only perform hash lookup in strict mode
|
||||
if ((query != null || !strict) && !collector.isComplete()) {
|
||||
// auto-detect search query
|
||||
Collection<String> querySet = (query == null) ? detectQuery(filter(files, VIDEO_FILES), false) : singleton(query);
|
||||
Collection<String> querySet = (query == null) ? detectQuery(filter(files, VIDEO_FILES), language.toLocale(), false) : singleton(query);
|
||||
|
||||
for (SubtitleProvider service : WebServices.getSubtitleProviders()) {
|
||||
if (collector.isComplete()) {
|
||||
|
@ -618,9 +619,9 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||
}
|
||||
|
||||
|
||||
private List<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
|
||||
private List<String> detectQuery(Collection<File> mediaFiles, Locale locale, boolean strict) throws Exception {
|
||||
// detect series name by common word sequence
|
||||
List<String> names = detectSeriesNames(mediaFiles);
|
||||
List<String> names = detectSeriesNames(mediaFiles, locale);
|
||||
|
||||
if (names.isEmpty() || (strict && names.size() > 1)) {
|
||||
throw new Exception("Unable to auto-select query: " + names);
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
|
@ -24,6 +25,9 @@ import java.util.Map.Entry;
|
|||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -42,15 +46,18 @@ import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
|||
|
||||
public class MediaDetection {
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files) throws Exception {
|
||||
private static ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
|
||||
SortedMap<File, List<File>> filesByFolder = mapByFolder(filter(files, VIDEO_FILES, SUBTITLE_FILES));
|
||||
|
||||
// map series names by folder
|
||||
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
|
||||
|
||||
for (Entry<File, List<File>> it : filesByFolder.entrySet()) {
|
||||
Set<String> namesForFolder = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||
namesForFolder.addAll(detectSeriesNames(it.getValue()));
|
||||
Set<String> namesForFolder = new TreeSet<String>(getLenientCollator(locale));
|
||||
namesForFolder.addAll(detectSeriesNames(it.getValue(), locale));
|
||||
|
||||
seriesNamesByFolder.put(it.getKey(), namesForFolder);
|
||||
}
|
||||
|
@ -74,7 +81,7 @@ public class MediaDetection {
|
|||
Map<Set<File>, Set<String>> batchSets = new HashMap<Set<File>, Set<String>>();
|
||||
|
||||
while (seriesNamesByFolder.size() > 0) {
|
||||
Set<String> combinedNameSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||
Set<String> combinedNameSet = new TreeSet<String>(getLenientCollator(locale));
|
||||
Set<File> combinedFolderSet = new HashSet<File>();
|
||||
|
||||
// build combined match set
|
||||
|
@ -116,12 +123,12 @@ public class MediaDetection {
|
|||
}
|
||||
|
||||
|
||||
public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
|
||||
public static List<String> detectSeriesNames(Collection<File> files, Locale locale) throws Exception {
|
||||
// don't allow duplicates
|
||||
Map<String, String> names = new LinkedHashMap<String, String>();
|
||||
|
||||
try {
|
||||
for (SearchResult it : lookupSeriesNameByInfoFile(files, Locale.ENGLISH)) {
|
||||
for (SearchResult it : lookupSeriesNameByInfoFile(files, locale)) {
|
||||
names.put(it.getName().toLowerCase(), it.getName());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
@ -129,10 +136,10 @@ public class MediaDetection {
|
|||
}
|
||||
|
||||
// match common word sequence and clean detected word sequence from unwanted elements
|
||||
Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
|
||||
Collection<String> matches = new SeriesNameMatcher(getLenientCollator(locale)).matchAll(files.toArray(new File[files.size()]));
|
||||
|
||||
try {
|
||||
matches = stripReleaseInfo(matches);
|
||||
matches = stripReleaseInfo(matches, true);
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to clean matches: " + e.getMessage(), e);
|
||||
}
|
||||
|
@ -148,6 +155,7 @@ public class MediaDetection {
|
|||
public static Collection<Movie> detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception {
|
||||
Set<Movie> options = new LinkedHashSet<Movie>();
|
||||
|
||||
// lookup by file hash
|
||||
if (hashLookupService != null) {
|
||||
for (Movie movie : hashLookupService.getMovieDescriptors(new File[] { movieFile }, locale)) {
|
||||
if (movie != null) {
|
||||
|
@ -156,58 +164,128 @@ public class MediaDetection {
|
|||
}
|
||||
}
|
||||
|
||||
// lookup by id from nfo file
|
||||
if (queryLookupService != null) {
|
||||
// try to grep imdb id from nfo files
|
||||
for (int imdbid : grepImdbIdFor(movieFile)) {
|
||||
Movie movie = queryLookupService.getMovieDescriptor(imdbid, locale);
|
||||
|
||||
if (movie != null) {
|
||||
options.add(movie);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (queryLookupService != null && !strict && options.isEmpty()) {
|
||||
// search by file name or folder name
|
||||
Collection<String> searchQueries = new LinkedHashSet<String>();
|
||||
searchQueries.add(getName(movieFile));
|
||||
searchQueries.add(getName(movieFile.getParentFile()));
|
||||
|
||||
// remove blacklisted terms
|
||||
searchQueries = stripReleaseInfo(searchQueries);
|
||||
|
||||
final SimilarityMetric metric = new NameSimilarityMetric();
|
||||
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
|
||||
for (String query : searchQueries) {
|
||||
for (Movie movie : queryLookupService.searchMovie(query, locale)) {
|
||||
probabilityMap.put(movie, metric.getSimilarity(query, movie));
|
||||
}
|
||||
}
|
||||
|
||||
// sort by similarity to original query (descending)
|
||||
List<Movie> results = new ArrayList<Movie>(probabilityMap.keySet());
|
||||
sort(results, new Comparator<Movie>() {
|
||||
|
||||
@Override
|
||||
public int compare(Movie a, Movie b) {
|
||||
return probabilityMap.get(b).compareTo(probabilityMap.get(a));
|
||||
}
|
||||
});
|
||||
|
||||
options.addAll(results);
|
||||
// search by file name or folder name
|
||||
List<String> files = new ArrayList<String>();
|
||||
files.add(getName(movieFile));
|
||||
files.add(getName(movieFile.getParentFile()));
|
||||
|
||||
long t = System.currentTimeMillis();
|
||||
List<Movie> movieNameMatches = matchMovieName(files, locale, strict);
|
||||
System.out.println(System.currentTimeMillis() - t);
|
||||
|
||||
// skip further queries if collected matches are already sufficient
|
||||
if (options.size() > 0 && movieNameMatches.size() > 0) {
|
||||
options.addAll(movieNameMatches);
|
||||
return options;
|
||||
}
|
||||
|
||||
// continue gathering more matches if possible
|
||||
options.addAll(movieNameMatches);
|
||||
|
||||
// query by file / folder name
|
||||
if (queryLookupService != null && !strict) {
|
||||
options.addAll(queryMovieByFileName(files, queryLookupService, locale));
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
|
||||
public static String stripReleaseInfo(String name) throws IOException {
|
||||
return new ReleaseInfo().cleanRelease(name);
|
||||
private static List<Movie> matchMovieName(final List<String> files, final Locale locale, final boolean strict) throws Exception {
|
||||
// cross-reference file / folder name with movie list
|
||||
final SeriesNameMatcher nameMatcher = new SeriesNameMatcher(String.CASE_INSENSITIVE_ORDER); // use simple comparator for speed (2-3x faster)
|
||||
|
||||
final Map<Movie, String> matchMap = synchronizedMap(new HashMap<Movie, String>());
|
||||
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
||||
|
||||
for (final Movie movie : releaseInfo.getMovieList()) {
|
||||
executor.submit(new Runnable() {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
for (String name : files) {
|
||||
String movieIdentifier = movie.getName();
|
||||
String commonName = nameMatcher.matchByFirstCommonWordSequence(name, movieIdentifier);
|
||||
if (commonName != null && commonName.length() >= movieIdentifier.length()) {
|
||||
String strictMovieIdentifier = movie.getName() + " " + movie.getYear();
|
||||
String strictCommonName = nameMatcher.matchByFirstCommonWordSequence(name, strictMovieIdentifier);
|
||||
if (strictCommonName != null && strictCommonName.length() >= strictMovieIdentifier.length()) {
|
||||
// prefer strict match
|
||||
matchMap.put(movie, strictCommonName);
|
||||
} else if (!strict) {
|
||||
// make sure the common identifier is not just the year
|
||||
matchMap.put(movie, commonName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// wait for last task to finish
|
||||
executor.shutdown();
|
||||
executor.awaitTermination(1, TimeUnit.MINUTES);
|
||||
|
||||
// sort by length of name match (descending)
|
||||
List<Movie> results = new ArrayList<Movie>(matchMap.keySet());
|
||||
sort(results, new Comparator<Movie>() {
|
||||
|
||||
@Override
|
||||
public int compare(Movie a, Movie b) {
|
||||
return Integer.compare(matchMap.get(b).length(), matchMap.get(a).length());
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
public static List<String> stripReleaseInfo(Collection<String> names) throws IOException {
|
||||
return new ReleaseInfo().cleanRelease(names);
|
||||
private static Collection<Movie> queryMovieByFileName(List<String> files, MovieIdentificationService queryLookupService, Locale locale) throws Exception {
|
||||
// remove blacklisted terms
|
||||
Set<String> querySet = new LinkedHashSet<String>();
|
||||
querySet.addAll(stripReleaseInfo(files, true));
|
||||
querySet.addAll(stripReleaseInfo(files, false));
|
||||
|
||||
final SimilarityMetric metric = new NameSimilarityMetric();
|
||||
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
|
||||
for (String query : querySet) {
|
||||
for (Movie movie : queryLookupService.searchMovie(query, locale)) {
|
||||
probabilityMap.put(movie, metric.getSimilarity(query, movie));
|
||||
}
|
||||
}
|
||||
|
||||
// sort by similarity to original query (descending)
|
||||
List<Movie> results = new ArrayList<Movie>(probabilityMap.keySet());
|
||||
sort(results, new Comparator<Movie>() {
|
||||
|
||||
@Override
|
||||
public int compare(Movie a, Movie b) {
|
||||
return probabilityMap.get(b).compareTo(probabilityMap.get(a));
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
public static String stripReleaseInfo(String name) throws IOException {
|
||||
return releaseInfo.cleanRelease(name, true);
|
||||
}
|
||||
|
||||
|
||||
public static List<String> stripReleaseInfo(Collection<String> names, boolean strict) throws IOException {
|
||||
return releaseInfo.cleanRelease(names, strict);
|
||||
}
|
||||
|
||||
|
||||
|
@ -284,4 +362,13 @@ public class MediaDetection {
|
|||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public static Comparator<String> getLenientCollator(Locale locale) {
|
||||
// use maximum strength collator by default
|
||||
final Collator collator = Collator.getInstance(locale);
|
||||
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
||||
collator.setStrength(Collator.TERTIARY);
|
||||
|
||||
return (Comparator) collator;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package net.sourceforge.filebot.media;
|
|||
|
||||
import static java.util.ResourceBundle.*;
|
||||
import static java.util.regex.Pattern.*;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||
import static net.sourceforge.tuned.StringUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -11,33 +12,40 @@ import java.io.IOException;
|
|||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import net.sourceforge.filebot.web.CachedResource;
|
||||
import net.sourceforge.filebot.web.Movie;
|
||||
import net.sourceforge.tuned.ByteBufferInputStream;
|
||||
|
||||
|
||||
public class ReleaseInfo {
|
||||
|
||||
public String getVideoSource(File file) {
|
||||
// check parent and itself for group names
|
||||
return matchLast(getVideoSourcePattern(), file.getParent(), file.getName());
|
||||
return matchLast(getVideoSourcePattern(), getBundle(getClass().getName()).getString("pattern.video.source").split("[|]"), file.getParent(), file.getName());
|
||||
}
|
||||
|
||||
|
||||
public String getReleaseGroup(File file) throws IOException {
|
||||
// check parent and itself for group names
|
||||
return matchLast(getReleaseGroupPattern(), file.getParent(), file.getName());
|
||||
return matchLast(getReleaseGroupPattern(false), releaseGroupResource.get(), file.getParent(), file.getName());
|
||||
}
|
||||
|
||||
|
||||
protected String matchLast(Pattern pattern, CharSequence... sequence) {
|
||||
protected String matchLast(Pattern pattern, String[] standardValues, CharSequence... sequence) {
|
||||
String lastMatch = null;
|
||||
|
||||
// match last occurrence
|
||||
for (CharSequence name : sequence) {
|
||||
if (name == null)
|
||||
continue;
|
||||
|
@ -48,24 +56,36 @@ public class ReleaseInfo {
|
|||
}
|
||||
}
|
||||
|
||||
// prefer standard value over matched value
|
||||
if (lastMatch != null) {
|
||||
for (String standard : standardValues) {
|
||||
if (standard.equalsIgnoreCase(lastMatch)) {
|
||||
return standard;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lastMatch;
|
||||
}
|
||||
|
||||
|
||||
public List<String> cleanRelease(Iterable<String> items) throws IOException {
|
||||
return clean(items, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern());
|
||||
public List<String> cleanRelease(Iterable<String> items, boolean strict) throws IOException {
|
||||
return clean(items, getReleaseGroupPattern(strict), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern(false));
|
||||
}
|
||||
|
||||
|
||||
public String cleanRelease(String item) throws IOException {
|
||||
return clean(item, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern());
|
||||
public String cleanRelease(String item, boolean strict) throws IOException {
|
||||
return clean(item, getReleaseGroupPattern(strict), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern(false));
|
||||
}
|
||||
|
||||
|
||||
public List<String> clean(Iterable<String> items, Pattern... blacklisted) {
|
||||
List<String> cleanedItems = new ArrayList<String>();
|
||||
for (String it : items) {
|
||||
cleanedItems.add(clean(it, blacklisted));
|
||||
String cleanedItem = clean(it, blacklisted);
|
||||
if (cleanedItem.length() > 0) {
|
||||
cleanedItems.add(cleanedItem);
|
||||
}
|
||||
}
|
||||
|
||||
return cleanedItems;
|
||||
|
@ -77,7 +97,7 @@ public class ReleaseInfo {
|
|||
item = it.matcher(item).replaceAll("");
|
||||
}
|
||||
|
||||
return item.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim();
|
||||
return normalizePunctuation(item);
|
||||
}
|
||||
|
||||
|
||||
|
@ -88,14 +108,16 @@ public class ReleaseInfo {
|
|||
Locale locale = new Locale(code);
|
||||
tokens.add(locale.getLanguage());
|
||||
tokens.add(locale.getISO3Language());
|
||||
tokens.add(locale.getDisplayLanguage(Locale.ENGLISH));
|
||||
for (Locale language : new HashSet<Locale>(Arrays.asList(Locale.ENGLISH, Locale.getDefault()))) {
|
||||
tokens.add(locale.getDisplayLanguage(language));
|
||||
}
|
||||
}
|
||||
|
||||
// remove illegal tokens
|
||||
tokens.remove("");
|
||||
|
||||
// .{language}[.srt]
|
||||
return compile("(?<=[.])(" + join(tokens, "|") + ")(?=$)", CASE_INSENSITIVE);
|
||||
return compile("(?<=\\p{Punct})(" + join(tokens, "|") + ")(?=$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
||||
}
|
||||
|
||||
|
||||
|
@ -119,21 +141,27 @@ public class ReleaseInfo {
|
|||
}
|
||||
|
||||
|
||||
public Pattern getReleaseGroupPattern() throws IOException {
|
||||
public synchronized Pattern getReleaseGroupPattern(boolean strict) throws IOException {
|
||||
// pattern matching any release group name enclosed in separators
|
||||
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
|
||||
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
||||
}
|
||||
|
||||
|
||||
public Pattern getBlacklistPattern() throws IOException {
|
||||
public synchronized Pattern getBlacklistPattern(boolean strict) throws IOException {
|
||||
// pattern matching any release group name enclosed in separators
|
||||
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
|
||||
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
||||
}
|
||||
|
||||
|
||||
public synchronized Movie[] getMovieList() throws IOException {
|
||||
return movieListResource.get();
|
||||
}
|
||||
|
||||
|
||||
// fetch release group names online and try to update the data every other day
|
||||
protected final PatternResource releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
|
||||
protected final PatternResource queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
||||
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
|
||||
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
||||
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
||||
|
||||
|
||||
protected static class PatternResource extends CachedResource<String[]> {
|
||||
|
@ -149,4 +177,28 @@ public class ReleaseInfo {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
protected static class MovieResource extends CachedResource<Movie[]> {
|
||||
|
||||
public MovieResource(String resource) {
|
||||
super(resource, Movie[].class, 24 * 60 * 60 * 1000); // 24h update interval
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Movie[] process(ByteBuffer data) throws IOException {
|
||||
Scanner scanner = new Scanner(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n");
|
||||
|
||||
List<Movie> movies = new ArrayList<Movie>();
|
||||
while (scanner.hasNext()) {
|
||||
int imdbid = scanner.nextInt();
|
||||
String name = scanner.next();
|
||||
int year = scanner.nextInt();
|
||||
movies.add(new Movie(name, year, imdbid));
|
||||
}
|
||||
|
||||
return movies.toArray(new Movie[0]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,10 +2,13 @@
|
|||
pattern.video.source: CAMRip|CAM|TS|TELESYNC|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R5|R5LINE|DVDRip|DVDR|TVRip|DSR|PDTV|HDTV|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR|WorkPrint|VHS|VCD
|
||||
|
||||
# additional release info patterns
|
||||
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|ac3|2ch|6ch|ws|hr|720p|1080p
|
||||
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|ac3|2ch|6ch|WS|HR|720p|1080p
|
||||
|
||||
# group names mostly copied from [http://scenelingo.wordpress.com/list-of-scene-release-groups]
|
||||
url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
|
||||
|
||||
# blacklisted terms that will be ignored
|
||||
url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt
|
||||
|
||||
# list of all movies (id, name, year)
|
||||
url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
|
||||
|
|
|
@ -3,6 +3,8 @@ package net.sourceforge.filebot.similarity;
|
|||
|
||||
|
||||
import static java.util.Collections.*;
|
||||
import static java.util.regex.Pattern.*;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||
import static net.sourceforge.tuned.StringUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -28,10 +30,21 @@ import net.sourceforge.tuned.FileUtilities;
|
|||
|
||||
public class SeriesNameMatcher {
|
||||
|
||||
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
||||
protected final NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
||||
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||
|
||||
protected final int commonWordSequenceMaxStartIndex = 3;
|
||||
protected int commonWordSequenceMaxStartIndex = 3;
|
||||
protected Comparator<String> commonWordComparator;
|
||||
|
||||
|
||||
public SeriesNameMatcher() {
|
||||
this(String.CASE_INSENSITIVE_ORDER);
|
||||
}
|
||||
|
||||
|
||||
public SeriesNameMatcher(Comparator<String> comparator) {
|
||||
this.commonWordComparator = comparator;
|
||||
}
|
||||
|
||||
|
||||
public Collection<String> matchAll(File[] files) {
|
||||
|
@ -75,7 +88,7 @@ public class SeriesNameMatcher {
|
|||
whitelist.addAll(deepMatchAll(focus, threshold));
|
||||
|
||||
// 1. use pattern matching
|
||||
seriesNames.addAll(flatMatchAll(names, Pattern.compile(join(whitelist, "|"), Pattern.CASE_INSENSITIVE), threshold, false));
|
||||
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ), threshold, false));
|
||||
|
||||
// 2. use common word sequences
|
||||
seriesNames.addAll(whitelist);
|
||||
|
@ -92,7 +105,7 @@ public class SeriesNameMatcher {
|
|||
* threshold
|
||||
*/
|
||||
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
||||
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER);
|
||||
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, commonWordComparator);
|
||||
|
||||
for (String name : names) {
|
||||
// use normalized name
|
||||
|
@ -191,7 +204,7 @@ public class SeriesNameMatcher {
|
|||
common = words;
|
||||
} else {
|
||||
// find common sequence
|
||||
common = firstCommonSequence(common, words, commonWordSequenceMaxStartIndex, String.CASE_INSENSITIVE_ORDER);
|
||||
common = firstCommonSequence(common, words, commonWordSequenceMaxStartIndex, commonWordComparator);
|
||||
|
||||
if (common == null) {
|
||||
// no common sequence
|
||||
|
@ -209,14 +222,12 @@ public class SeriesNameMatcher {
|
|||
|
||||
protected String normalize(String name) {
|
||||
// remove group names and checksums, any [...] or (...)
|
||||
name = name.replaceAll("\\([^\\(]*\\)", " ");
|
||||
name = name.replaceAll("\\[[^\\[]*\\]", " ");
|
||||
name = normalizeBrackets(name);
|
||||
|
||||
// remove/normalize special characters
|
||||
name = name.replaceAll("['`´]+", "");
|
||||
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
||||
name = normalizePunctuation(name);
|
||||
|
||||
return name.trim();
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
|||
List<Callable<List<Match<File, ?>>>> taskPerFolder = new ArrayList<Callable<List<Match<File, ?>>>>();
|
||||
|
||||
// detect series names and create episode list fetch tasks
|
||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles).entrySet()) {
|
||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
|
||||
List<List<File>> batchSets = new ArrayList<List<File>>();
|
||||
|
||||
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
|
||||
|
@ -219,7 +219,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
|||
|
||||
// detect series name and fetch episode list
|
||||
if (autodetection) {
|
||||
Collection<String> names = detectSeriesNames(files);
|
||||
Collection<String> names = detectSeriesNames(files, locale);
|
||||
if (names.size() > 0) {
|
||||
// only allow one fetch session at a time so later requests can make use of cached results
|
||||
synchronized (provider) {
|
||||
|
|
|
@ -192,6 +192,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
|
|||
selectDialog.setTitle(movieFile.getPath());
|
||||
selectDialog.getHeaderLabel().setText(String.format("Movies matching '%s':", stripReleaseInfo(getName(movieFile))));
|
||||
selectDialog.getCancelAction().putValue(Action.NAME, "Ignore");
|
||||
selectDialog.pack();
|
||||
|
||||
// show dialog
|
||||
selectDialog.setLocation(getOffsetLocation(selectDialog.getOwner()));
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Collection;
|
|||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.TreeSet;
|
||||
|
@ -973,7 +974,7 @@ class SubtitleAutoMatchDialog extends JDialog {
|
|||
}
|
||||
|
||||
// auto-detect query and search for subtitles
|
||||
Collection<String> querySet = detectSeriesNames(files);
|
||||
Collection<String> querySet = detectSeriesNames(files, Locale.ENGLISH);
|
||||
List<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, languageName);
|
||||
|
||||
// if auto-detection fails, ask user for input
|
||||
|
|
|
@ -11,52 +11,61 @@ public class ByteBufferInputStream extends InputStream {
|
|||
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
|
||||
|
||||
public ByteBufferInputStream(ByteBuffer buffer) {
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
if (buffer.remaining() <= 0)
|
||||
return -1;
|
||||
|
||||
return buffer.get();
|
||||
return (buffer.position() < buffer.limit()) ? (buffer.get() & 0xff) : -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
if (buffer.remaining() <= 0)
|
||||
if (b == null) {
|
||||
throw new NullPointerException();
|
||||
} else if (off < 0 || len < 0 || len > b.length - off) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
|
||||
if (buffer.position() >= buffer.limit()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int length = Math.min(len, buffer.remaining());
|
||||
if (len > buffer.remaining()) {
|
||||
len = buffer.remaining();
|
||||
}
|
||||
|
||||
buffer.get(b, off, length);
|
||||
if (len <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return length;
|
||||
buffer.get(b, off, len);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public int available() throws IOException {
|
||||
return buffer.remaining();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void mark(int readlimit) {
|
||||
buffer.mark();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
buffer.reset();
|
||||
|
|
Binary file not shown.
|
@ -1,10 +1,17 @@
|
|||
PROPER
|
||||
RETAIL
|
||||
^(TV.)?(Show|Serie|Anime)[s]?$
|
||||
^Movie[s]?$
|
||||
^Video[s]?$
|
||||
CD[1-3]
|
||||
Demonoid
|
||||
ExtraScene
|
||||
ExtraTorrent
|
||||
PROPER
|
||||
Hard.Subbed
|
||||
mkvonly
|
||||
MVGroup.org
|
||||
READNFO
|
||||
REPACK
|
||||
RETAIL
|
||||
ShareReactor
|
||||
ShareZONE
|
||||
UsaBit.com
|
||||
|
|
|
@ -25,6 +25,7 @@ BAJSKORV
|
|||
BamHD
|
||||
Barba
|
||||
BaSS
|
||||
BAUM
|
||||
BDiSC
|
||||
BiA
|
||||
BlueTV
|
||||
|
@ -167,6 +168,7 @@ LMAO
|
|||
LoD
|
||||
LOL
|
||||
LOLCATS
|
||||
LTT
|
||||
MAiN
|
||||
MainEvent
|
||||
MARiNES
|
||||
|
|
Loading…
Reference in New Issue