* fix anime matching

This commit is contained in:
Reinhard Pointner 2014-01-09 20:44:02 +00:00
parent f466546788
commit 0da24469b7
4 changed files with 25 additions and 20 deletions

View File

@ -409,15 +409,15 @@ public class Main {
MediaDetection.getClutterFileFilter(); MediaDetection.getClutterFileFilter();
MediaDetection.getDiskFolderFilter(); MediaDetection.getDiskFolderFilter();
Collection<File> empty = Collections.emptyList();
MediaDetection.matchSeriesByDirectMapping(empty);
// pre-load movie/series index // pre-load movie/series index
List<String> dummy = Collections.singletonList(""); List<String> dummy = Collections.singletonList("");
MediaDetection.stripReleaseInfo(dummy, true); MediaDetection.stripReleaseInfo(dummy, true);
MediaDetection.matchSeriesByName(dummy, -1); MediaDetection.matchSeriesByName(dummy, -1, MediaDetection.getSeriesIndex());
MediaDetection.matchSeriesByName(dummy, -1, MediaDetection.getAnimeIndex());
MediaDetection.matchMovieName(dummy, true, -1); MediaDetection.matchMovieName(dummy, true, -1);
Collection<File> empty = Collections.emptyList();
MediaDetection.matchSeriesByDirectMapping(empty);
WebServices.TheTVDB.getLocalIndex();
} catch (Exception e) { } catch (Exception e) {
Logger.getLogger(getClass().getName()).log(Level.WARNING, e.getMessage(), e); Logger.getLogger(getClass().getName()).log(Level.WARNING, e.getMessage(), e);
} }

View File

@ -280,7 +280,7 @@ public class MediaDetection {
return detectSeriesNames(files, locale, index); return detectSeriesNames(files, locale, index);
} }
public static List<String> detectSeriesNames(Collection<File> files, Locale locale, List<IndexEntry<SearchResult>> seriesIndex) throws Exception { public static List<String> detectSeriesNames(Collection<File> files, Locale locale, List<IndexEntry<SearchResult>> index) throws Exception {
List<String> names = new ArrayList<String>(); List<String> names = new ArrayList<String>();
// try xattr metadata if enabled // try xattr metadata if enabled
@ -327,12 +327,12 @@ public class MediaDetection {
} }
// check foldernames first // check foldernames first
List<String> matches = matchSeriesByName(folders, 0); List<String> matches = matchSeriesByName(folders, 0, index);
// check all filenames if necessary // check all filenames if necessary
if (matches.isEmpty()) { if (matches.isEmpty()) {
matches.addAll(matchSeriesByName(filenames, 0)); matches.addAll(matchSeriesByName(filenames, 0, index));
matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0)); matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0, index));
} }
// use lenient sub sequence matching only as fallback and try name without spacing logic that may mess up any lookup // use lenient sub sequence matching only as fallback and try name without spacing logic that may mess up any lookup
@ -348,13 +348,13 @@ public class MediaDetection {
sns.set(i, sn); sns.set(i, sn);
} }
} }
for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true)) { for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true, index)) {
matches.add(it.getName()); matches.add(it.getName());
} }
// less reliable CWS deep matching // less reliable CWS deep matching
matches.addAll(matchSeriesByName(folders, 2)); matches.addAll(matchSeriesByName(folders, 2, index));
matches.addAll(matchSeriesByName(filenames, 2)); matches.addAll(matchSeriesByName(filenames, 2, index));
// pass along only valid terms // pass along only valid terms
names.addAll(stripBlacklistedTerms(matches)); names.addAll(stripBlacklistedTerms(matches));
@ -454,7 +454,7 @@ public class MediaDetection {
} }
} }
public static List<String> matchSeriesByName(Collection<String> files, int maxStartIndex) throws Exception { public static List<String> matchSeriesByName(Collection<String> files, int maxStartIndex, List<IndexEntry<SearchResult>> index) throws Exception {
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex); HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
List<String> matches = new ArrayList<String>(); List<String> matches = new ArrayList<String>();
@ -462,7 +462,7 @@ public class MediaDetection {
for (CollationKey[] name : names) { for (CollationKey[] name : names) {
IndexEntry<SearchResult> bestMatch = null; IndexEntry<SearchResult> bestMatch = null;
for (IndexEntry<SearchResult> it : getSeriesIndex()) { for (IndexEntry<SearchResult> it : index) {
CollationKey[] commonName = nameMatcher.matchFirstCommonSequence(name, it.getLenientKey()); CollationKey[] commonName = nameMatcher.matchFirstCommonSequence(name, it.getLenientKey());
if (commonName != null && commonName.length >= it.getLenientKey().length && (bestMatch == null || commonName.length > bestMatch.getLenientKey().length)) { if (commonName != null && commonName.length >= it.getLenientKey().length && (bestMatch == null || commonName.length > bestMatch.getLenientKey().length)) {
bestMatch = it; bestMatch = it;
@ -485,7 +485,7 @@ public class MediaDetection {
return matches; return matches;
} }
public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException { public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict, List<IndexEntry<SearchResult>> index) throws IOException {
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail // clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+"); Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
@ -502,7 +502,7 @@ public class MediaDetection {
float similarityThreshold = strict ? 0.75f : 0.5f; float similarityThreshold = strict ? 0.75f : 0.5f;
List<SearchResult> seriesList = new ArrayList<SearchResult>(); List<SearchResult> seriesList = new ArrayList<SearchResult>();
for (IndexEntry<SearchResult> it : getSeriesIndex()) { for (IndexEntry<SearchResult> it : index) {
String name = spacing.matcher(it.getLenientName()).replaceAll("").toLowerCase(); String name = spacing.matcher(it.getLenientName()).replaceAll("").toLowerCase();
for (String term : terms) { for (String term : terms) {
if (term.contains(name)) { if (term.contains(name)) {

View File

@ -180,6 +180,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
typeOrder.add("1"); typeOrder.add("1");
typeOrder.add("4"); typeOrder.add("4");
typeOrder.add("2"); typeOrder.add("2");
typeOrder.add("3");
// fetch data // fetch data
Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536); Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536);
@ -196,15 +197,18 @@ public class AnidbClient extends AbstractEpisodeListProvider {
String title = matcher.group(4); String title = matcher.group(4);
if (aid > 0 && title.length() > 0 && typeOrder.contains(type) && languageOrder.contains(language)) { if (aid > 0 && title.length() > 0 && typeOrder.contains(type) && languageOrder.contains(language)) {
// resolve HTML entities
title = Jsoup.parse(title).text();
if (type.equals("3") && (title.length() < 5 || !Character.isUpperCase(title.charAt(0)) || Character.isUpperCase(title.charAt(title.length() - 1)))) {
continue;
}
List<Object[]> names = entriesByAnime.get(aid); List<Object[]> names = entriesByAnime.get(aid);
if (names == null) { if (names == null) {
names = new ArrayList<Object[]>(); names = new ArrayList<Object[]>();
entriesByAnime.put(aid, names); entriesByAnime.put(aid, names);
} }
// resolve HTML entities
title = Jsoup.parse(title).text();
names.add(new Object[] { typeOrder.indexOf(type), languageOrder.indexOf(language), title }); names.add(new Object[] { typeOrder.indexOf(type), languageOrder.indexOf(language), title });
} }
} }

View File

@ -111,6 +111,7 @@
^test$ ^test$
^testdata$ ^testdata$
^tmp$ ^tmp$
^to$
^Torrent$ ^Torrent$
^Torrents$ ^Torrents$
^Tracker$ ^Tracker$