From 0da24469b778a4e39b7254b7a21923da737e4086 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 9 Jan 2014 20:44:02 +0000 Subject: [PATCH] * fix anime matching --- source/net/sourceforge/filebot/Main.java | 10 ++++----- .../filebot/media/MediaDetection.java | 22 +++++++++---------- .../sourceforge/filebot/web/AnidbClient.java | 12 ++++++---- website/data/query-blacklist.txt | 1 + 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/source/net/sourceforge/filebot/Main.java b/source/net/sourceforge/filebot/Main.java index 357bf0c4..d9e449fa 100644 --- a/source/net/sourceforge/filebot/Main.java +++ b/source/net/sourceforge/filebot/Main.java @@ -409,15 +409,15 @@ public class Main { MediaDetection.getClutterFileFilter(); MediaDetection.getDiskFolderFilter(); + Collection empty = Collections.emptyList(); + MediaDetection.matchSeriesByDirectMapping(empty); + // pre-load movie/series index List dummy = Collections.singletonList(""); MediaDetection.stripReleaseInfo(dummy, true); - MediaDetection.matchSeriesByName(dummy, -1); + MediaDetection.matchSeriesByName(dummy, -1, MediaDetection.getSeriesIndex()); + MediaDetection.matchSeriesByName(dummy, -1, MediaDetection.getAnimeIndex()); MediaDetection.matchMovieName(dummy, true, -1); - - Collection empty = Collections.emptyList(); - MediaDetection.matchSeriesByDirectMapping(empty); - WebServices.TheTVDB.getLocalIndex(); } catch (Exception e) { Logger.getLogger(getClass().getName()).log(Level.WARNING, e.getMessage(), e); } diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index 095a0c13..540203d8 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -280,7 +280,7 @@ public class MediaDetection { return detectSeriesNames(files, locale, index); } - public static List detectSeriesNames(Collection files, Locale locale, List> seriesIndex) throws Exception { + public static List detectSeriesNames(Collection files, Locale locale, List> index) throws Exception { List names = new ArrayList(); // try xattr metadata if enabled @@ -327,12 +327,12 @@ public class MediaDetection { } // check foldernames first - List matches = matchSeriesByName(folders, 0); + List matches = matchSeriesByName(folders, 0, index); // check all filenames if necessary if (matches.isEmpty()) { - matches.addAll(matchSeriesByName(filenames, 0)); - matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0)); + matches.addAll(matchSeriesByName(filenames, 0, index)); + matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0, index)); } // use lenient sub sequence matching only as fallback and try name without spacing logic that may mess up any lookup @@ -348,13 +348,13 @@ public class MediaDetection { sns.set(i, sn); } } - for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true)) { + for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true, index)) { matches.add(it.getName()); } // less reliable CWS deep matching - matches.addAll(matchSeriesByName(folders, 2)); - matches.addAll(matchSeriesByName(filenames, 2)); + matches.addAll(matchSeriesByName(folders, 2, index)); + matches.addAll(matchSeriesByName(filenames, 2, index)); // pass along only valid terms names.addAll(stripBlacklistedTerms(matches)); @@ -454,7 +454,7 @@ public class MediaDetection { } } - public static List matchSeriesByName(Collection files, int maxStartIndex) throws Exception { + public static List matchSeriesByName(Collection files, int maxStartIndex, List> index) throws Exception { HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex); List matches = new ArrayList(); @@ -462,7 +462,7 @@ public class MediaDetection { for (CollationKey[] name : names) { IndexEntry bestMatch = null; - for (IndexEntry it : getSeriesIndex()) { + for (IndexEntry it : index) { CollationKey[] commonName = nameMatcher.matchFirstCommonSequence(name, it.getLenientKey()); if (commonName != null && commonName.length >= it.getLenientKey().length && (bestMatch == null || commonName.length > bestMatch.getLenientKey().length)) { bestMatch = it; @@ -485,7 +485,7 @@ public class MediaDetection { return matches; } - public static List matchSeriesFromStringWithoutSpacing(Collection names, boolean strict) throws IOException { + public static List matchSeriesFromStringWithoutSpacing(Collection names, boolean strict, List> index) throws IOException { // clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+"); @@ -502,7 +502,7 @@ public class MediaDetection { float similarityThreshold = strict ? 0.75f : 0.5f; List seriesList = new ArrayList(); - for (IndexEntry it : getSeriesIndex()) { + for (IndexEntry it : index) { String name = spacing.matcher(it.getLenientName()).replaceAll("").toLowerCase(); for (String term : terms) { if (term.contains(name)) { diff --git a/source/net/sourceforge/filebot/web/AnidbClient.java b/source/net/sourceforge/filebot/web/AnidbClient.java index aa65e686..e48540d5 100644 --- a/source/net/sourceforge/filebot/web/AnidbClient.java +++ b/source/net/sourceforge/filebot/web/AnidbClient.java @@ -180,6 +180,7 @@ public class AnidbClient extends AbstractEpisodeListProvider { typeOrder.add("1"); typeOrder.add("4"); typeOrder.add("2"); + typeOrder.add("3"); // fetch data Map> entriesByAnime = new HashMap>(65536); @@ -196,15 +197,18 @@ public class AnidbClient extends AbstractEpisodeListProvider { String title = matcher.group(4); if (aid > 0 && title.length() > 0 && typeOrder.contains(type) && languageOrder.contains(language)) { + // resolve HTML entities + title = Jsoup.parse(title).text(); + + if (type.equals("3") && (title.length() < 5 || !Character.isUpperCase(title.charAt(0)) || Character.isUpperCase(title.charAt(title.length() - 1)))) { + continue; + } + List names = entriesByAnime.get(aid); if (names == null) { names = new ArrayList(); entriesByAnime.put(aid, names); } - - // resolve HTML entities - title = Jsoup.parse(title).text(); - names.add(new Object[] { typeOrder.indexOf(type), languageOrder.indexOf(language), title }); } } diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index d17f9152..467df28b 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -111,6 +111,7 @@ ^test$ ^testdata$ ^tmp$ +^to$ ^Torrent$ ^Torrents$ ^Tracker$