* improved series detection for certain cases

This commit is contained in:
Reinhard Pointner 2012-07-04 10:20:52 +00:00
parent 5145d906dd
commit 811f945daf
4 changed files with 32 additions and 18 deletions

View File

@ -214,14 +214,20 @@ public class MediaDetection {
}
// check foldernames first
List<String> matches = matchSeriesByName(folders);
List<String> matches = matchSeriesByName(folders, 0);
// check all filenames if necessary
if (matches.isEmpty()) {
matches = matchSeriesByName(filenames);
matches = matchSeriesByName(filenames, 0);
}
// use lenient sub sequence matching only as fallback
if (matches.size() > 0) {
names.addAll(matches);
} else {
names.addAll(matchSeriesByName(folders, 3));
names.addAll(matchSeriesByName(filenames, 3));
}
} catch (Exception e) {
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to match folder structure: " + e.getMessage(), e);
}
@ -244,8 +250,8 @@ public class MediaDetection {
}
public static List<String> matchSeriesByName(Collection<String> names) throws Exception {
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(0);
public static List<String> matchSeriesByName(Collection<String> names, int maxStartIndex) throws Exception {
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
List<String> matches = new ArrayList<String>();
String[] seriesIndex = releaseInfo.getSeriesList();
@ -602,8 +608,8 @@ public class MediaDetection {
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new WeakHashMap<String, CollationKey[]>(65536));
public HighPerformanceMatcher(int commonWordSequenceMaxStartIndex) {
super(collator, commonWordSequenceMaxStartIndex);
public HighPerformanceMatcher(int maxStartIndex) {
super(collator, maxStartIndex);
}

View File

@ -146,7 +146,10 @@ public class ReleaseInfo {
for (Pattern it : stopwords) {
Matcher matcher = it.matcher(item);
if (matcher.find()) {
item = item.substring(0, matcher.start()); // use substring before the matched stopword
String substring = item.substring(0, matcher.start()); // use substring before the matched stopword
if (normalizePunctuation(substring).length() >= 3) {
item = substring; // make sure that the substring has enough data
}
}
}
return item;

View File

@ -14,6 +14,7 @@
[1-9].?of.?[1-9]
^(TV.)?(Show|Serie)[s]?
^[0-9]{1,2}[.]
^[lp][^\p{Alnum}]
^AUDIO_TS$
^BDMV$
^Cover
@ -21,7 +22,6 @@
^Film[s]?
^HVDVD_TS$
^Info
^l[^\p{Alnum}]
^Movie[s]?
^New$
^Other$
@ -34,6 +34,7 @@
^VIDEO_TS$
A.Release.Lounge
Anime[s]?
BBC
By.Cool.Release
CD[0]?[1-3]
CN
@ -121,6 +122,7 @@ unrated
unrated.edition
UsaBit.com
Video[s]?
www.speed.cd
www.torentz.3xforum.ro
www.Torrenting.com
www[.]

View File

@ -89,16 +89,16 @@ BTSD
BTT
BugZ
BULLDOZER
Bunny
BUNNY
BWB
C4TV
CAMELOT
CBGB
CDD
CDDHD
Chakra
CHAKRA
chaostheory
Chara
CHARA
charliebartlett
CHD
CHDBits
@ -114,6 +114,7 @@ CiNEFiLE
CiNEFOX
CLASSiC
CLDD
CLUE
cntc
COALiTiON
Cocksure
@ -139,7 +140,6 @@ danger2u
danirl
DARM
DASH
DATA
DAW
DCA
DDC
@ -178,7 +178,7 @@ DNR
DON
DoNE
DOT
doubt
DOUBT
DOWN
DRHD
DUPLI
@ -235,7 +235,7 @@ FilmHD
FiNaLe
fjall
FLAiTE
Flaket
FLAKET
fLAMEhd
FLAWL3SS
Flomp-Rumbel
@ -251,7 +251,7 @@ FQM
FRAGMENT
FraMeSToR
FRIGGHD
Frost
FROST
FSiHD
Ft4U
FTVDT
@ -259,7 +259,7 @@ FTW-FM
FTW-HD
fty
FUCT
Funner
FUNNER
FXG
FxM
G3N3
@ -268,7 +268,6 @@ Gazdi
GB
GECKOS
GEHENNA
george.c
GFW
GFY
GiNJi
@ -557,7 +556,9 @@ RTA
RUBY
RuDE
RUDOS
RUNNER
RUSTLE
RWD
Ryugan
S26
SAiMORNY
@ -660,6 +661,7 @@ TRUEFRENCH
tRuEHD
TsH
tsn
TURKiSO
TUSAHD
TVA
TW
@ -700,6 +702,7 @@ w4f
WAF
WANKAZ
WASTE
WAT
WAVEY
WHATELSE
WHiiZz