diff --git a/source/net/sourceforge/filebot/web/AbstractEpisodeListProvider.java b/source/net/sourceforge/filebot/web/AbstractEpisodeListProvider.java index 7c548192..fadf8fcc 100644 --- a/source/net/sourceforge/filebot/web/AbstractEpisodeListProvider.java +++ b/source/net/sourceforge/filebot/web/AbstractEpisodeListProvider.java @@ -23,17 +23,22 @@ public abstract class AbstractEpisodeListProvider implements EpisodeListProvider public List search(String query) throws Exception { - return search(query, Locale.ENGLISH); + return search(query, getDefaultLocale()); } public List getEpisodeList(SearchResult searchResult) throws Exception { - return getEpisodeList(searchResult, Locale.ENGLISH); + return getEpisodeList(searchResult, getDefaultLocale()); } public List getEpisodeList(SearchResult searchResult, int season) throws Exception { - return getEpisodeList(searchResult, season, Locale.ENGLISH); + return getEpisodeList(searchResult, season, getDefaultLocale()); + } + + + public Locale getDefaultLocale() { + return Locale.ENGLISH; } diff --git a/source/net/sourceforge/filebot/web/AnidbClient.java b/source/net/sourceforge/filebot/web/AnidbClient.java index e96827d1..cb04dc72 100644 --- a/source/net/sourceforge/filebot/web/AnidbClient.java +++ b/source/net/sourceforge/filebot/web/AnidbClient.java @@ -10,19 +10,15 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Scanner; -import java.util.TreeMap; -import java.util.AbstractMap.SimpleEntry; +import java.util.Set; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -33,9 +29,6 @@ import javax.swing.Icon; import org.w3c.dom.Document; import org.w3c.dom.Node; -import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; -import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; - import net.sf.ehcache.Cache; import net.sf.ehcache.CacheManager; import net.sf.ehcache.Element; @@ -82,54 +75,16 @@ public class AnidbClient extends AbstractEpisodeListProvider { @Override - public List search(String query, Locale locale) throws Exception { - // normalize - query = query.toLowerCase(); - - AbstractStringMetric metric = new QGramsDistance(); - - final List> resultSet = new ArrayList>(); - - for (AnidbSearchResult anime : getAnimeTitles()) { - for (String name : new String[] { anime.getMainTitle(), anime.getEnglishTitle() }) { - if (name != null) { - // normalize - name = name.toLowerCase(); - float similarity = metric.getSimilarity(name, query); - - if (similarity > 0.5 || name.contains(query)) { - resultSet.add(new SimpleEntry(anime, similarity)); - - // add only once - break; - } - } - } - } - - // sort by similarity descending (best matches first) - Collections.sort(resultSet, new Comparator>() { + public List search(String query, final Locale locale) throws Exception { + LocalSearch index = new LocalSearch(getAnimeTitles()) { @Override - public int compare(Entry o1, Entry o2) { - return o2.getValue().compareTo(o1.getValue()); - } - }); - - // view for the first 20 search results - return new AbstractList() { - - @Override - public SearchResult get(int index) { - return resultSet.get(index).getKey(); - } - - - @Override - public int size() { - return Math.min(20, resultSet.size()); + protected Set getFields(AnidbSearchResult anime) { + return set(anime.getPrimaryTitle(), anime.getOfficialTitle("en"), anime.getOfficialTitle(locale.getLanguage())); } }; + + return new ArrayList(index.search(query)); } @@ -225,8 +180,8 @@ public class AnidbClient extends AbstractEpisodeListProvider { // type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language) Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$"); - Map primaryTitleMap = new TreeMap(); - Map englishTitleMap = new HashMap(); + Map primaryTitleMap = new HashMap(); + Map> officialTitleMap = new HashMap>(); // fetch data Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8"); @@ -236,10 +191,21 @@ public class AnidbClient extends AbstractEpisodeListProvider { Matcher matcher = pattern.matcher(scanner.nextLine()); if (matcher.matches()) { - if (matcher.group(2).equals("1")) { - primaryTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4)); - } else if (matcher.group(2).equals("4") && matcher.group(3).equals("en")) { - englishTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4)); + int aid = Integer.parseInt(matcher.group(1)); + String type = matcher.group(2); + String language = matcher.group(3); + String title = matcher.group(4); + + if (type.equals("1")) { + primaryTitleMap.put(aid, title); + } else if (type.equals("4")) { + Map languageTitleMap = officialTitleMap.get(aid); + if (languageTitleMap == null) { + languageTitleMap = new HashMap(); + officialTitleMap.put(aid, languageTitleMap); + } + + languageTitleMap.put(language, title); } } } @@ -247,11 +213,11 @@ public class AnidbClient extends AbstractEpisodeListProvider { scanner.close(); } - // build up a list of all possible anidb search results + // build up a list of all possible AniDB search results anime = new ArrayList(primaryTitleMap.size()); for (Entry entry : primaryTitleMap.entrySet()) { - anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitleMap.get(entry.getKey()))); + anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), officialTitleMap.get(entry.getKey()))); } // populate cache @@ -264,19 +230,19 @@ public class AnidbClient extends AbstractEpisodeListProvider { public static class AnidbSearchResult extends SearchResult implements Serializable { protected int aid; - protected String mainTitle; - protected String englishTitle; + protected String primaryTitle; // one per anime + protected Map officialTitle; // one per language + - protected AnidbSearchResult() { // used by serializer } - public AnidbSearchResult(int aid, String mainTitle, String englishTitle) { + public AnidbSearchResult(int aid, String primaryTitle, Map officialTitle) { this.aid = aid; - this.mainTitle = mainTitle; - this.englishTitle = englishTitle; + this.primaryTitle = primaryTitle; + this.officialTitle = officialTitle; } @@ -287,17 +253,17 @@ public class AnidbClient extends AbstractEpisodeListProvider { @Override public String getName() { - return mainTitle; + return primaryTitle; } - public String getMainTitle() { - return mainTitle; + public String getPrimaryTitle() { + return primaryTitle; } - public String getEnglishTitle() { - return englishTitle; + public String getOfficialTitle(String key) { + return officialTitle != null ? officialTitle.get(key) : null; } } diff --git a/source/net/sourceforge/filebot/web/LocalSearch.java b/source/net/sourceforge/filebot/web/LocalSearch.java new file mode 100644 index 00000000..1e39700b --- /dev/null +++ b/source/net/sourceforge/filebot/web/LocalSearch.java @@ -0,0 +1,129 @@ + +package net.sourceforge.filebot.web; + + +import static java.util.Collections.*; + +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.AbstractMap.SimpleEntry; +import java.util.Map.Entry; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; +import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; + + +class LocalSearch { + + private final AbstractStringMetric metric = new QGramsDistance(); + private final float resultMinimumSimilarity = 0.5f; + private final int resultSetSize = 20; + + private final List objects; + private final List> fields; + + + public LocalSearch(Collection data) { + objects = new ArrayList(data); + fields = new ArrayList>(objects.size()); + + for (int i = 0; i < objects.size(); i++) { + fields.add(i, getFields(objects.get(i))); + } + } + + + public List search(String query) throws ExecutionException, InterruptedException { + final String q = normalize(query); + List>> tasks = new ArrayList>>(objects.size()); + + for (int i = 0; i < objects.size(); i++) { + final int index = i; + tasks.add(new Callable>() { + + @Override + public Entry call() throws Exception { + float similarity = 0; + boolean match = false; + + for (String field : fields.get(index)) { + match |= field.contains(q); + similarity = Math.max(metric.getSimilarity(q, field), similarity); + } + + return match || similarity > resultMinimumSimilarity ? new SimpleEntry(objects.get(index), similarity) : null; + } + }); + } + + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + final List> resultSet = new ArrayList>(objects.size()); + + try { + for (Future> entry : executor.invokeAll(tasks)) { + if (entry.get() != null) { + resultSet.add(entry.get()); + } + } + } finally { + executor.shutdown(); + } + + // sort by similarity descending (best matches first) + sort(resultSet, new Comparator>() { + + @Override + public int compare(Entry o1, Entry o2) { + return o2.getValue().compareTo(o1.getValue()); + } + }); + + // view for the first 20 search results + return new AbstractList() { + + @Override + public T get(int index) { + return resultSet.get(index).getKey(); + } + + + @Override + public int size() { + return Math.min(resultSetSize, resultSet.size()); + } + }; + } + + + protected Set getFields(T object) { + return set(object.toString()); + } + + + protected Set set(String... values) { + Set set = new HashSet(values.length); + for (String value : values) { + if (value != null) { + set.add(normalize(value)); + } + } + return set; + } + + + protected String normalize(String value) { + // normalize separator, normalize case and trim + return value.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim().toLowerCase(); + } + +} diff --git a/source/net/sourceforge/filebot/web/SerienjunkiesClient.java b/source/net/sourceforge/filebot/web/SerienjunkiesClient.java index 3ef9aa21..24279762 100644 --- a/source/net/sourceforge/filebot/web/SerienjunkiesClient.java +++ b/source/net/sourceforge/filebot/web/SerienjunkiesClient.java @@ -10,16 +10,12 @@ import java.io.Reader; import java.io.Serializable; import java.net.URI; import java.net.URL; -import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Locale; -import java.util.AbstractMap.SimpleEntry; -import java.util.Map.Entry; +import java.util.Set; import javax.net.ssl.HttpsURLConnection; import javax.swing.Icon; @@ -28,9 +24,6 @@ import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.JSONValue; -import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; -import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; - import net.sf.ehcache.Cache; import net.sf.ehcache.CacheManager; import net.sf.ehcache.Element; @@ -63,55 +56,22 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider { @Override - public List search(String query, Locale locale) throws IOException { - // normalize - query = query.toLowerCase(); - - AbstractStringMetric metric = new QGramsDistance(); - - final List> resultSet = new ArrayList>(); - - for (SerienjunkiesSearchResult anime : getSeriesTitles()) { - for (String name : new String[] { anime.getMainTitle(), anime.getGermanTitle() }) { - if (name != null) { - // normalize - name = name.toLowerCase(); - - float similarity = metric.getSimilarity(name, query); - - if (similarity > 0.5 || name.contains(query)) { - resultSet.add(new SimpleEntry(anime, similarity)); - - // add only once - break; - } - } - } - } - - // sort by similarity descending (best matches first) - Collections.sort(resultSet, new Comparator>() { - - @Override - public int compare(Entry o1, Entry o2) { - return o2.getValue().compareTo(o1.getValue()); - } - }); - - // view for the first 20 search results - return new AbstractList() { - - @Override - public SearchResult get(int index) { - return resultSet.get(index).getKey(); - } - + public Locale getDefaultLocale() { + return Locale.GERMAN; + } + + @Override + public List search(String query, Locale locale) throws Exception { + LocalSearch index = new LocalSearch(getSeriesTitles()) { + @Override - public int size() { - return Math.min(20, resultSet.size()); + protected Set getFields(SerienjunkiesSearchResult series) { + return set(series.getMainTitle(), series.getGermanTitle()); } }; + + return new ArrayList(index.search(query)); } @@ -158,6 +118,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider { // fetch episode data episodes = new ArrayList(25); + String seriesName = locale.equals(Locale.GERMAN) && series.getGermanTitle() != null ? series.getGermanTitle() : series.getMainTitle(); JSONObject data = (JSONObject) request("/allepisodes.php?d=" + apikey + "&q=" + series.getSeriesId()); JSONArray list = (JSONArray) data.get("allepisodes"); @@ -169,7 +130,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider { String title = (String) obj.get("german"); Date airdate = Date.parse((String) ((JSONObject) obj.get("airdates")).get("premiere"), "yyyy-MM-dd"); - episodes.add(new Episode(series.getName(), series.getStartDate(), season, episode, title, i + 1, null, airdate)); + episodes.add(new Episode(seriesName, series.getStartDate(), season, episode, title, i + 1, null, airdate)); } // populate cache @@ -241,7 +202,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider { @Override public String getName() { - return germanTitle != null ? germanTitle : mainTitle; // prefer german title + return germanTitle != null ? germanTitle : mainTitle; // prefer German title } diff --git a/test/net/sourceforge/filebot/web/AnidbClientTest.java b/test/net/sourceforge/filebot/web/AnidbClientTest.java index 5c48196b..3f52be60 100644 --- a/test/net/sourceforge/filebot/web/AnidbClientTest.java +++ b/test/net/sourceforge/filebot/web/AnidbClientTest.java @@ -36,7 +36,7 @@ public class AnidbClientTest { @BeforeClass public static void setUpBeforeClass() throws Exception { monsterSearchResult = new AnidbSearchResult(1539, "Monster", null); - twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", "The Twelve Kingdoms"); + twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", null); princessTutuSearchResult = new AnidbSearchResult(516, "Princess Tutu", null); } @@ -49,7 +49,16 @@ public class AnidbClientTest { List results = anidb.search("one piece"); AnidbSearchResult result = (AnidbSearchResult) results.get(0); + assertEquals("One Piece", result.getName()); + assertEquals(69, result.getAnimeId()); + } + + + @Test + public void searchJapanese() throws Exception { + List results = anidb.search("ワンピース", Locale.JAPANESE); + AnidbSearchResult result = (AnidbSearchResult) results.get(0); assertEquals("One Piece", result.getName()); assertEquals(69, result.getAnimeId()); }