From 811f945daf9ee431faf6423b7528e9b4c7329d69 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Wed, 4 Jul 2012 10:20:52 +0000 Subject: [PATCH] * improved series detection for certain cases --- .../filebot/media/MediaDetection.java | 20 +++++++++++------- .../filebot/media/ReleaseInfo.java | 5 ++++- website/data/query-blacklist.txt | 4 +++- website/data/release-groups.txt | 21 +++++++++++-------- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index e174bc1b..329bd828 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -214,14 +214,20 @@ public class MediaDetection { } // check foldernames first - List matches = matchSeriesByName(folders); + List matches = matchSeriesByName(folders, 0); // check all filenames if necessary if (matches.isEmpty()) { - matches = matchSeriesByName(filenames); + matches = matchSeriesByName(filenames, 0); } - names.addAll(matches); + // use lenient sub sequence matching only as fallback + if (matches.size() > 0) { + names.addAll(matches); + } else { + names.addAll(matchSeriesByName(folders, 3)); + names.addAll(matchSeriesByName(filenames, 3)); + } } catch (Exception e) { Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to match folder structure: " + e.getMessage(), e); } @@ -244,8 +250,8 @@ public class MediaDetection { } - public static List matchSeriesByName(Collection names) throws Exception { - HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(0); + public static List matchSeriesByName(Collection names, int maxStartIndex) throws Exception { + HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex); List matches = new ArrayList(); String[] seriesIndex = releaseInfo.getSeriesList(); @@ -602,8 +608,8 @@ public class MediaDetection { private static final Map transformCache = synchronizedMap(new WeakHashMap(65536)); - public HighPerformanceMatcher(int commonWordSequenceMaxStartIndex) { - super(collator, commonWordSequenceMaxStartIndex); + public HighPerformanceMatcher(int maxStartIndex) { + super(collator, maxStartIndex); } diff --git a/source/net/sourceforge/filebot/media/ReleaseInfo.java b/source/net/sourceforge/filebot/media/ReleaseInfo.java index ce576e91..bb1d8256 100644 --- a/source/net/sourceforge/filebot/media/ReleaseInfo.java +++ b/source/net/sourceforge/filebot/media/ReleaseInfo.java @@ -146,7 +146,10 @@ public class ReleaseInfo { for (Pattern it : stopwords) { Matcher matcher = it.matcher(item); if (matcher.find()) { - item = item.substring(0, matcher.start()); // use substring before the matched stopword + String substring = item.substring(0, matcher.start()); // use substring before the matched stopword + if (normalizePunctuation(substring).length() >= 3) { + item = substring; // make sure that the substring has enough data + } } } return item; diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index a4daf721..05e5ea8b 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -14,6 +14,7 @@ [1-9].?of.?[1-9] ^(TV.)?(Show|Serie)[s]? ^[0-9]{1,2}[.] +^[lp][^\p{Alnum}] ^AUDIO_TS$ ^BDMV$ ^Cover @@ -21,7 +22,6 @@ ^Film[s]? ^HVDVD_TS$ ^Info -^l[^\p{Alnum}] ^Movie[s]? ^New$ ^Other$ @@ -34,6 +34,7 @@ ^VIDEO_TS$ A.Release.Lounge Anime[s]? +BBC By.Cool.Release CD[0]?[1-3] CN @@ -121,6 +122,7 @@ unrated unrated.edition UsaBit.com Video[s]? +www.speed.cd www.torentz.3xforum.ro www.Torrenting.com www[.] diff --git a/website/data/release-groups.txt b/website/data/release-groups.txt index 4daf1a77..a19f4efc 100644 --- a/website/data/release-groups.txt +++ b/website/data/release-groups.txt @@ -89,16 +89,16 @@ BTSD BTT BugZ BULLDOZER -Bunny +BUNNY BWB C4TV CAMELOT CBGB CDD CDDHD -Chakra +CHAKRA chaostheory -Chara +CHARA charliebartlett CHD CHDBits @@ -114,6 +114,7 @@ CiNEFiLE CiNEFOX CLASSiC CLDD +CLUE cntc COALiTiON Cocksure @@ -139,7 +140,6 @@ danger2u danirl DARM DASH -DATA DAW DCA DDC @@ -178,7 +178,7 @@ DNR DON DoNE DOT -doubt +DOUBT DOWN DRHD DUPLI @@ -235,7 +235,7 @@ FilmHD FiNaLe fjall FLAiTE -Flaket +FLAKET fLAMEhd FLAWL3SS Flomp-Rumbel @@ -251,7 +251,7 @@ FQM FRAGMENT FraMeSToR FRIGGHD -Frost +FROST FSiHD Ft4U FTVDT @@ -259,7 +259,7 @@ FTW-FM FTW-HD fty FUCT -Funner +FUNNER FXG FxM G3N3 @@ -268,7 +268,6 @@ Gazdi GB GECKOS GEHENNA -george.c GFW GFY GiNJi @@ -557,7 +556,9 @@ RTA RUBY RuDE RUDOS +RUNNER RUSTLE +RWD Ryugan S26 SAiMORNY @@ -660,6 +661,7 @@ TRUEFRENCH tRuEHD TsH tsn +TURKiSO TUSAHD TVA TW @@ -700,6 +702,7 @@ w4f WAF WANKAZ WASTE +WAT WAVEY WHATELSE WHiiZz