* fix anime matching

This commit is contained in:
Reinhard Pointner 2014-01-09 20:44:02 +00:00
parent f466546788
commit 0da24469b7
4 changed files with 25 additions and 20 deletions

View File

@ -409,15 +409,15 @@ public class Main {
MediaDetection.getClutterFileFilter();
MediaDetection.getDiskFolderFilter();
Collection<File> empty = Collections.emptyList();
MediaDetection.matchSeriesByDirectMapping(empty);
// pre-load movie/series index
List<String> dummy = Collections.singletonList("");
MediaDetection.stripReleaseInfo(dummy, true);
MediaDetection.matchSeriesByName(dummy, -1);
MediaDetection.matchSeriesByName(dummy, -1, MediaDetection.getSeriesIndex());
MediaDetection.matchSeriesByName(dummy, -1, MediaDetection.getAnimeIndex());
MediaDetection.matchMovieName(dummy, true, -1);
Collection<File> empty = Collections.emptyList();
MediaDetection.matchSeriesByDirectMapping(empty);
WebServices.TheTVDB.getLocalIndex();
} catch (Exception e) {
Logger.getLogger(getClass().getName()).log(Level.WARNING, e.getMessage(), e);
}

View File

@ -280,7 +280,7 @@ public class MediaDetection {
return detectSeriesNames(files, locale, index);
}
public static List<String> detectSeriesNames(Collection<File> files, Locale locale, List<IndexEntry<SearchResult>> seriesIndex) throws Exception {
public static List<String> detectSeriesNames(Collection<File> files, Locale locale, List<IndexEntry<SearchResult>> index) throws Exception {
List<String> names = new ArrayList<String>();
// try xattr metadata if enabled
@ -327,12 +327,12 @@ public class MediaDetection {
}
// check foldernames first
List<String> matches = matchSeriesByName(folders, 0);
List<String> matches = matchSeriesByName(folders, 0, index);
// check all filenames if necessary
if (matches.isEmpty()) {
matches.addAll(matchSeriesByName(filenames, 0));
matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0));
matches.addAll(matchSeriesByName(filenames, 0, index));
matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0, index));
}
// use lenient sub sequence matching only as fallback and try name without spacing logic that may mess up any lookup
@ -348,13 +348,13 @@ public class MediaDetection {
sns.set(i, sn);
}
}
for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true)) {
for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true, index)) {
matches.add(it.getName());
}
// less reliable CWS deep matching
matches.addAll(matchSeriesByName(folders, 2));
matches.addAll(matchSeriesByName(filenames, 2));
matches.addAll(matchSeriesByName(folders, 2, index));
matches.addAll(matchSeriesByName(filenames, 2, index));
// pass along only valid terms
names.addAll(stripBlacklistedTerms(matches));
@ -454,7 +454,7 @@ public class MediaDetection {
}
}
public static List<String> matchSeriesByName(Collection<String> files, int maxStartIndex) throws Exception {
public static List<String> matchSeriesByName(Collection<String> files, int maxStartIndex, List<IndexEntry<SearchResult>> index) throws Exception {
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
List<String> matches = new ArrayList<String>();
@ -462,7 +462,7 @@ public class MediaDetection {
for (CollationKey[] name : names) {
IndexEntry<SearchResult> bestMatch = null;
for (IndexEntry<SearchResult> it : getSeriesIndex()) {
for (IndexEntry<SearchResult> it : index) {
CollationKey[] commonName = nameMatcher.matchFirstCommonSequence(name, it.getLenientKey());
if (commonName != null && commonName.length >= it.getLenientKey().length && (bestMatch == null || commonName.length > bestMatch.getLenientKey().length)) {
bestMatch = it;
@ -485,7 +485,7 @@ public class MediaDetection {
return matches;
}
public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException {
public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict, List<IndexEntry<SearchResult>> index) throws IOException {
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
@ -502,7 +502,7 @@ public class MediaDetection {
float similarityThreshold = strict ? 0.75f : 0.5f;
List<SearchResult> seriesList = new ArrayList<SearchResult>();
for (IndexEntry<SearchResult> it : getSeriesIndex()) {
for (IndexEntry<SearchResult> it : index) {
String name = spacing.matcher(it.getLenientName()).replaceAll("").toLowerCase();
for (String term : terms) {
if (term.contains(name)) {

View File

@ -180,6 +180,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
typeOrder.add("1");
typeOrder.add("4");
typeOrder.add("2");
typeOrder.add("3");
// fetch data
Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536);
@ -196,15 +197,18 @@ public class AnidbClient extends AbstractEpisodeListProvider {
String title = matcher.group(4);
if (aid > 0 && title.length() > 0 && typeOrder.contains(type) && languageOrder.contains(language)) {
// resolve HTML entities
title = Jsoup.parse(title).text();
if (type.equals("3") && (title.length() < 5 || !Character.isUpperCase(title.charAt(0)) || Character.isUpperCase(title.charAt(title.length() - 1)))) {
continue;
}
List<Object[]> names = entriesByAnime.get(aid);
if (names == null) {
names = new ArrayList<Object[]>();
entriesByAnime.put(aid, names);
}
// resolve HTML entities
title = Jsoup.parse(title).text();
names.add(new Object[] { typeOrder.indexOf(type), languageOrder.indexOf(language), title });
}
}

View File

@ -111,6 +111,7 @@
^test$
^testdata$
^tmp$
^to$
^Torrent$
^Torrents$
^Tracker$