* switch to using an online exclude pattern list that can be updated anytime for everybody

This commit is contained in:
Reinhard Pointner 2012-07-26 08:45:15 +00:00
parent e3fde5f139
commit 3f9c0ab67e
10 changed files with 66 additions and 37 deletions

View File

@ -16,6 +16,7 @@ def sortRegexList(path) {
// sort and check shared regex collections // sort and check shared regex collections
sortRegexList("website/data/release-groups.txt") sortRegexList("website/data/release-groups.txt")
sortRegexList("website/data/query-blacklist.txt") sortRegexList("website/data/query-blacklist.txt")
sortRegexList("website/data/exclude-blacklist.txt")
// ------------------------------------------------------------------------- // // ------------------------------------------------------------------------- //

View File

@ -38,9 +38,6 @@ import javax.swing.SwingUtilities;
import javax.swing.UIManager; import javax.swing.UIManager;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
import org.kohsuke.args4j.CmdLineException;
import org.w3c.dom.NodeList;
import net.miginfocom.swing.MigLayout; import net.miginfocom.swing.MigLayout;
import net.sf.ehcache.CacheManager; import net.sf.ehcache.CacheManager;
import net.sourceforge.filebot.cli.ArgumentBean; import net.sourceforge.filebot.cli.ArgumentBean;
@ -56,6 +53,9 @@ import net.sourceforge.filebot.web.CachedResource;
import net.sourceforge.tuned.ByteBufferInputStream; import net.sourceforge.tuned.ByteBufferInputStream;
import net.sourceforge.tuned.PreferencesMap.PreferencesEntry; import net.sourceforge.tuned.PreferencesMap.PreferencesEntry;
import org.kohsuke.args4j.CmdLineException;
import org.w3c.dom.NodeList;
public class Main { public class Main {
@ -264,6 +264,10 @@ public class Main {
@Override @Override
public void run() { public void run() {
try { try {
// pre-load filter data
MediaDetection.getClutterFileFilter();
MediaDetection.getDiskFolderFilter();
// pre-load movie/series index // pre-load movie/series index
List<String> dummy = Collections.singletonList(""); List<String> dummy = Collections.singletonList("");
MediaDetection.stripReleaseInfo(dummy, true); MediaDetection.stripReleaseInfo(dummy, true);

View File

@ -292,7 +292,7 @@ public class CmdlineOperations implements CmdlineInterface {
CLILogger.config(format("Rename movies using [%s]", service.getName())); CLILogger.config(format("Rename movies using [%s]", service.getName()));
// ignore sample files // ignore sample files
List<File> fileset = filter(files, NON_CLUTTER_FILES); List<File> fileset = filter(files, not(getClutterFileFilter()));
// handle movie files // handle movie files
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES)); Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));

View File

@ -61,17 +61,33 @@ public class MediaDetection {
public static final ReleaseInfo releaseInfo = new ReleaseInfo(); public static final ReleaseInfo releaseInfo = new ReleaseInfo();
public static final FileFilter DISK_FOLDERS = releaseInfo.getDiskFolderFilter(); private static FileFilter diskFolder;
public static final FileFilter NON_CLUTTER_FILES = not(releaseInfo.getClutterFileFilter()); private static FileFilter clutterFile;
public static boolean isDiskFolder(File folder) { public static FileFilter getDiskFolderFilter() {
return DISK_FOLDERS.accept(folder); if (diskFolder == null) {
diskFolder = releaseInfo.getDiskFolderFilter();
}
return diskFolder;
} }
public static boolean isNonClutter(File file) { public static FileFilter getClutterFileFilter() throws IOException {
return NON_CLUTTER_FILES.accept(file); if (clutterFile == null) {
clutterFile = releaseInfo.getClutterFileFilter();
}
return clutterFile;
}
public static boolean isDiskFolder(File folder) {
return getDiskFolderFilter().accept(folder);
}
public static boolean isClutterFile(File file) throws IOException {
return getClutterFileFilter().accept(file);
} }

View File

@ -22,6 +22,7 @@ import java.text.Normalizer.Form;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -102,10 +103,8 @@ public class ReleaseInfo {
} }
// cached patterns // cached patterns
private Pattern[] strict_stopwords; private final Map<Boolean, Pattern[]> stopwords = new HashMap<Boolean, Pattern[]>(2);
private Pattern[] strict_blacklist; private final Map<Boolean, Pattern[]> blacklist = new HashMap<Boolean, Pattern[]>(2);
private Pattern[] nonstrict_stopwords;
private Pattern[] nonstrict_blacklist;
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException { public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
@ -113,9 +112,9 @@ public class ReleaseInfo {
Pattern[] blacklist; Pattern[] blacklist;
// initialize cached patterns // initialize cached patterns
synchronized (this) { synchronized (this.stopwords) {
stopwords = strict ? strict_stopwords : nonstrict_stopwords; stopwords = this.stopwords.get(strict);
blacklist = strict ? strict_blacklist : nonstrict_blacklist; blacklist = this.blacklist.get(strict);
if (stopwords == null || blacklist == null) { if (stopwords == null || blacklist == null) {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet(); Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
@ -132,13 +131,8 @@ public class ReleaseInfo {
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist }; blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
// cache compiled patterns for common usage // cache compiled patterns for common usage
if (strict) { this.stopwords.put(strict, stopwords);
strict_stopwords = stopwords; this.blacklist.put(strict, blacklist);
strict_blacklist = blacklist;
} else {
nonstrict_stopwords = stopwords;
nonstrict_blacklist = blacklist;
}
} }
} }
@ -182,13 +176,13 @@ public class ReleaseInfo {
public Pattern getLanguageTagPattern(Collection<String> languages) { public Pattern getLanguageTagPattern(Collection<String> languages) {
// [en] // [en]
return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ); return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE);
} }
public Pattern getLanguageSuffixPattern(Collection<String> languages) { public Pattern getLanguageSuffixPattern(Collection<String> languages) {
// .en.srt // .en.srt
return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ); return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE);
} }
@ -221,13 +215,19 @@ public class ReleaseInfo {
public Pattern getReleaseGroupPattern(boolean strict) throws IOException { public Pattern getReleaseGroupPattern(boolean strict) throws IOException {
// pattern matching any release group name enclosed in separators // pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ); return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE);
} }
public Pattern getBlacklistPattern() throws IOException { public Pattern getBlacklistPattern() throws IOException {
// pattern matching any release group name enclosed in separators // pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ); return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE);
}
public Pattern getExcludePattern() throws IOException {
// pattern matching any release group name enclosed in separators
return compile(join(excludeBlacklistResource.get(), "|"), CASE_INSENSITIVE | UNICODE_CASE);
} }
@ -246,13 +246,14 @@ public class ReleaseInfo {
} }
public FileFilter getClutterFileFilter() { public FileFilter getClutterFileFilter() throws IOException {
return new FileFolderNameFilter(compile(getBundle(getClass().getName()).getString("pattern.file.ignore"))); return new FileFolderNameFilter(getExcludePattern());
} }
// fetch release group names online and try to update the data every other day // fetch release group names online and try to update the data every other day
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups")); protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist")); protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list")); protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list")); protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));

View File

@ -10,6 +10,9 @@ url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
# blacklisted terms that will be ignored # blacklisted terms that will be ignored
url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt
# clutter files that will be ignored
url.exclude-blacklist: http://filebot.sourceforge.net/data/exclude-blacklist.txt
# list of all movies (id, name, year) # list of all movies (id, name, year)
url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
@ -18,4 +21,3 @@ url.series-list: http://filebot.sourceforge.net/data/series.list.gz
# disk folder matcher # disk folder matcher
pattern.diskfolder.entry: ^BDMV$|^HVDVD_TS$|^VIDEO_TS$|^AUDIO_TS$|^VCD$ pattern.diskfolder.entry: ^BDMV$|^HVDVD_TS$|^VIDEO_TS$|^AUDIO_TS$|^VCD$
pattern.file.ignore: (?<!\\p{Alnum})(?i:sample|trailer|extras|deleted.scenes)(?!\\p{Alnum})

View File

@ -102,7 +102,7 @@ public class SeriesNameMatcher {
whitelist.addAll(deepMatchAll(focus, threshold)); whitelist.addAll(deepMatchAll(focus, threshold));
// 1. use pattern matching // 1. use pattern matching
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ), threshold, false)); seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE), threshold, false));
// 2. use common word sequences // 2. use common word sequences
seriesNames.addAll(whitelist); seriesNames.addAll(whitelist);

View File

@ -70,7 +70,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
@Override @Override
public List<Match<File, ?>> match(final List<File> files, final SortOrder sortOrder, final Locale locale, final boolean autodetect, final Component parent) throws Exception { public List<Match<File, ?>> match(final List<File> files, final SortOrder sortOrder, final Locale locale, final boolean autodetect, final Component parent) throws Exception {
// ignore sample files // ignore sample files
List<File> fileset = filter(files, NON_CLUTTER_FILES); List<File> fileset = filter(files, not(getClutterFileFilter()));
// handle movie files // handle movie files
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES)); Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));

View File

@ -190,7 +190,6 @@ class SubtitleAutoMatchDialog extends JDialog {
servicePanel.add(component); servicePanel.add(component);
} }
// remember last user input // remember last user input
private List<String> userQuery = new ArrayList<String>(); private List<String> userQuery = new ArrayList<String>();
@ -263,7 +262,6 @@ class SubtitleAutoMatchDialog extends JDialog {
return null; return null;
} }
private final Action downloadAction = new AbstractAction("Download", ResourceManager.getIcon("dialog.continue")) { private final Action downloadAction = new AbstractAction("Download", ResourceManager.getIcon("dialog.continue")) {
@Override @Override
@ -644,7 +642,6 @@ class SubtitleAutoMatchDialog extends JDialog {
} }
} }
private final PropertyChangeListener selectedOptionListener = new PropertyChangeListener() { private final PropertyChangeListener selectedOptionListener = new PropertyChangeListener() {
@Override @Override
@ -969,7 +966,7 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override @Override
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception { protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
// ignore clutter files from processing // ignore clutter files from processing
files = filter(files, NON_CLUTTER_FILES); files = filter(files, not(getClutterFileFilter()));
// auto-detect query and search for subtitles // auto-detect query and search for subtitles
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER); Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);

View File

@ -0,0 +1,8 @@
!(sample|trailer)
(?<!\p{Alnum})(extras|deleted.scenes)(?!\p{Alnum})
(sample|trailer)-
(sample|trailer)[.]
-(sample|trailer)
\((sample|trailer)\)
\[(sample|trailer)\]
^(sample|trailer)