* switch to using an online exclude pattern list that can be updated anytime for everybody
This commit is contained in:
parent
e3fde5f139
commit
3f9c0ab67e
|
@ -16,6 +16,7 @@ def sortRegexList(path) {
|
||||||
// sort and check shared regex collections
|
// sort and check shared regex collections
|
||||||
sortRegexList("website/data/release-groups.txt")
|
sortRegexList("website/data/release-groups.txt")
|
||||||
sortRegexList("website/data/query-blacklist.txt")
|
sortRegexList("website/data/query-blacklist.txt")
|
||||||
|
sortRegexList("website/data/exclude-blacklist.txt")
|
||||||
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------- //
|
// ------------------------------------------------------------------------- //
|
||||||
|
|
|
@ -38,9 +38,6 @@ import javax.swing.SwingUtilities;
|
||||||
import javax.swing.UIManager;
|
import javax.swing.UIManager;
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
|
||||||
import org.kohsuke.args4j.CmdLineException;
|
|
||||||
import org.w3c.dom.NodeList;
|
|
||||||
|
|
||||||
import net.miginfocom.swing.MigLayout;
|
import net.miginfocom.swing.MigLayout;
|
||||||
import net.sf.ehcache.CacheManager;
|
import net.sf.ehcache.CacheManager;
|
||||||
import net.sourceforge.filebot.cli.ArgumentBean;
|
import net.sourceforge.filebot.cli.ArgumentBean;
|
||||||
|
@ -56,6 +53,9 @@ import net.sourceforge.filebot.web.CachedResource;
|
||||||
import net.sourceforge.tuned.ByteBufferInputStream;
|
import net.sourceforge.tuned.ByteBufferInputStream;
|
||||||
import net.sourceforge.tuned.PreferencesMap.PreferencesEntry;
|
import net.sourceforge.tuned.PreferencesMap.PreferencesEntry;
|
||||||
|
|
||||||
|
import org.kohsuke.args4j.CmdLineException;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
|
|
||||||
public class Main {
|
public class Main {
|
||||||
|
|
||||||
|
@ -264,6 +264,10 @@ public class Main {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
|
// pre-load filter data
|
||||||
|
MediaDetection.getClutterFileFilter();
|
||||||
|
MediaDetection.getDiskFolderFilter();
|
||||||
|
|
||||||
// pre-load movie/series index
|
// pre-load movie/series index
|
||||||
List<String> dummy = Collections.singletonList("");
|
List<String> dummy = Collections.singletonList("");
|
||||||
MediaDetection.stripReleaseInfo(dummy, true);
|
MediaDetection.stripReleaseInfo(dummy, true);
|
||||||
|
|
|
@ -292,7 +292,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
||||||
CLILogger.config(format("Rename movies using [%s]", service.getName()));
|
CLILogger.config(format("Rename movies using [%s]", service.getName()));
|
||||||
|
|
||||||
// ignore sample files
|
// ignore sample files
|
||||||
List<File> fileset = filter(files, NON_CLUTTER_FILES);
|
List<File> fileset = filter(files, not(getClutterFileFilter()));
|
||||||
|
|
||||||
// handle movie files
|
// handle movie files
|
||||||
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));
|
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));
|
||||||
|
|
|
@ -61,17 +61,33 @@ public class MediaDetection {
|
||||||
|
|
||||||
public static final ReleaseInfo releaseInfo = new ReleaseInfo();
|
public static final ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||||
|
|
||||||
public static final FileFilter DISK_FOLDERS = releaseInfo.getDiskFolderFilter();
|
private static FileFilter diskFolder;
|
||||||
public static final FileFilter NON_CLUTTER_FILES = not(releaseInfo.getClutterFileFilter());
|
private static FileFilter clutterFile;
|
||||||
|
|
||||||
|
|
||||||
public static boolean isDiskFolder(File folder) {
|
public static FileFilter getDiskFolderFilter() {
|
||||||
return DISK_FOLDERS.accept(folder);
|
if (diskFolder == null) {
|
||||||
|
diskFolder = releaseInfo.getDiskFolderFilter();
|
||||||
|
}
|
||||||
|
return diskFolder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static boolean isNonClutter(File file) {
|
public static FileFilter getClutterFileFilter() throws IOException {
|
||||||
return NON_CLUTTER_FILES.accept(file);
|
if (clutterFile == null) {
|
||||||
|
clutterFile = releaseInfo.getClutterFileFilter();
|
||||||
|
}
|
||||||
|
return clutterFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean isDiskFolder(File folder) {
|
||||||
|
return getDiskFolderFilter().accept(folder);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean isClutterFile(File file) throws IOException {
|
||||||
|
return getClutterFileFilter().accept(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.text.Normalizer.Form;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -102,10 +103,8 @@ public class ReleaseInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
// cached patterns
|
// cached patterns
|
||||||
private Pattern[] strict_stopwords;
|
private final Map<Boolean, Pattern[]> stopwords = new HashMap<Boolean, Pattern[]>(2);
|
||||||
private Pattern[] strict_blacklist;
|
private final Map<Boolean, Pattern[]> blacklist = new HashMap<Boolean, Pattern[]>(2);
|
||||||
private Pattern[] nonstrict_stopwords;
|
|
||||||
private Pattern[] nonstrict_blacklist;
|
|
||||||
|
|
||||||
|
|
||||||
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
|
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
|
||||||
|
@ -113,9 +112,9 @@ public class ReleaseInfo {
|
||||||
Pattern[] blacklist;
|
Pattern[] blacklist;
|
||||||
|
|
||||||
// initialize cached patterns
|
// initialize cached patterns
|
||||||
synchronized (this) {
|
synchronized (this.stopwords) {
|
||||||
stopwords = strict ? strict_stopwords : nonstrict_stopwords;
|
stopwords = this.stopwords.get(strict);
|
||||||
blacklist = strict ? strict_blacklist : nonstrict_blacklist;
|
blacklist = this.blacklist.get(strict);
|
||||||
|
|
||||||
if (stopwords == null || blacklist == null) {
|
if (stopwords == null || blacklist == null) {
|
||||||
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
||||||
|
@ -132,13 +131,8 @@ public class ReleaseInfo {
|
||||||
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
|
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
|
||||||
|
|
||||||
// cache compiled patterns for common usage
|
// cache compiled patterns for common usage
|
||||||
if (strict) {
|
this.stopwords.put(strict, stopwords);
|
||||||
strict_stopwords = stopwords;
|
this.blacklist.put(strict, blacklist);
|
||||||
strict_blacklist = blacklist;
|
|
||||||
} else {
|
|
||||||
nonstrict_stopwords = stopwords;
|
|
||||||
nonstrict_blacklist = blacklist;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,13 +176,13 @@ public class ReleaseInfo {
|
||||||
|
|
||||||
public Pattern getLanguageTagPattern(Collection<String> languages) {
|
public Pattern getLanguageTagPattern(Collection<String> languages) {
|
||||||
// [en]
|
// [en]
|
||||||
return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Pattern getLanguageSuffixPattern(Collection<String> languages) {
|
public Pattern getLanguageSuffixPattern(Collection<String> languages) {
|
||||||
// .en.srt
|
// .en.srt
|
||||||
return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -221,13 +215,19 @@ public class ReleaseInfo {
|
||||||
|
|
||||||
public Pattern getReleaseGroupPattern(boolean strict) throws IOException {
|
public Pattern getReleaseGroupPattern(boolean strict) throws IOException {
|
||||||
// pattern matching any release group name enclosed in separators
|
// pattern matching any release group name enclosed in separators
|
||||||
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Pattern getBlacklistPattern() throws IOException {
|
public Pattern getBlacklistPattern() throws IOException {
|
||||||
// pattern matching any release group name enclosed in separators
|
// pattern matching any release group name enclosed in separators
|
||||||
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Pattern getExcludePattern() throws IOException {
|
||||||
|
// pattern matching any release group name enclosed in separators
|
||||||
|
return compile(join(excludeBlacklistResource.get(), "|"), CASE_INSENSITIVE | UNICODE_CASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -246,13 +246,14 @@ public class ReleaseInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public FileFilter getClutterFileFilter() {
|
public FileFilter getClutterFileFilter() throws IOException {
|
||||||
return new FileFolderNameFilter(compile(getBundle(getClass().getName()).getString("pattern.file.ignore")));
|
return new FileFolderNameFilter(getExcludePattern());
|
||||||
}
|
}
|
||||||
|
|
||||||
// fetch release group names online and try to update the data every other day
|
// fetch release group names online and try to update the data every other day
|
||||||
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
|
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
|
||||||
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
||||||
|
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
|
||||||
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
||||||
protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));
|
protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,9 @@ url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
|
||||||
# blacklisted terms that will be ignored
|
# blacklisted terms that will be ignored
|
||||||
url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt
|
url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt
|
||||||
|
|
||||||
|
# clutter files that will be ignored
|
||||||
|
url.exclude-blacklist: http://filebot.sourceforge.net/data/exclude-blacklist.txt
|
||||||
|
|
||||||
# list of all movies (id, name, year)
|
# list of all movies (id, name, year)
|
||||||
url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
|
url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
|
||||||
|
|
||||||
|
@ -18,4 +21,3 @@ url.series-list: http://filebot.sourceforge.net/data/series.list.gz
|
||||||
|
|
||||||
# disk folder matcher
|
# disk folder matcher
|
||||||
pattern.diskfolder.entry: ^BDMV$|^HVDVD_TS$|^VIDEO_TS$|^AUDIO_TS$|^VCD$
|
pattern.diskfolder.entry: ^BDMV$|^HVDVD_TS$|^VIDEO_TS$|^AUDIO_TS$|^VCD$
|
||||||
pattern.file.ignore: (?<!\\p{Alnum})(?i:sample|trailer|extras|deleted.scenes)(?!\\p{Alnum})
|
|
||||||
|
|
|
@ -102,7 +102,7 @@ public class SeriesNameMatcher {
|
||||||
whitelist.addAll(deepMatchAll(focus, threshold));
|
whitelist.addAll(deepMatchAll(focus, threshold));
|
||||||
|
|
||||||
// 1. use pattern matching
|
// 1. use pattern matching
|
||||||
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ), threshold, false));
|
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE), threshold, false));
|
||||||
|
|
||||||
// 2. use common word sequences
|
// 2. use common word sequences
|
||||||
seriesNames.addAll(whitelist);
|
seriesNames.addAll(whitelist);
|
||||||
|
|
|
@ -70,7 +70,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
|
||||||
@Override
|
@Override
|
||||||
public List<Match<File, ?>> match(final List<File> files, final SortOrder sortOrder, final Locale locale, final boolean autodetect, final Component parent) throws Exception {
|
public List<Match<File, ?>> match(final List<File> files, final SortOrder sortOrder, final Locale locale, final boolean autodetect, final Component parent) throws Exception {
|
||||||
// ignore sample files
|
// ignore sample files
|
||||||
List<File> fileset = filter(files, NON_CLUTTER_FILES);
|
List<File> fileset = filter(files, not(getClutterFileFilter()));
|
||||||
|
|
||||||
// handle movie files
|
// handle movie files
|
||||||
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));
|
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));
|
||||||
|
|
|
@ -190,7 +190,6 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
servicePanel.add(component);
|
servicePanel.add(component);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// remember last user input
|
// remember last user input
|
||||||
private List<String> userQuery = new ArrayList<String>();
|
private List<String> userQuery = new ArrayList<String>();
|
||||||
|
|
||||||
|
@ -263,7 +262,6 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final Action downloadAction = new AbstractAction("Download", ResourceManager.getIcon("dialog.continue")) {
|
private final Action downloadAction = new AbstractAction("Download", ResourceManager.getIcon("dialog.continue")) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -644,7 +642,6 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final PropertyChangeListener selectedOptionListener = new PropertyChangeListener() {
|
private final PropertyChangeListener selectedOptionListener = new PropertyChangeListener() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -969,7 +966,7 @@ class SubtitleAutoMatchDialog extends JDialog {
|
||||||
@Override
|
@Override
|
||||||
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
|
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
|
||||||
// ignore clutter files from processing
|
// ignore clutter files from processing
|
||||||
files = filter(files, NON_CLUTTER_FILES);
|
files = filter(files, not(getClutterFileFilter()));
|
||||||
|
|
||||||
// auto-detect query and search for subtitles
|
// auto-detect query and search for subtitles
|
||||||
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
!(sample|trailer)
|
||||||
|
(?<!\p{Alnum})(extras|deleted.scenes)(?!\p{Alnum})
|
||||||
|
(sample|trailer)-
|
||||||
|
(sample|trailer)[.]
|
||||||
|
-(sample|trailer)
|
||||||
|
\((sample|trailer)\)
|
||||||
|
\[(sample|trailer)\]
|
||||||
|
^(sample|trailer)
|
Loading…
Reference in New Issue