* extract local search into it's own class
* AniDB: search by any language * SJ: use german series name if language is set to GERMAN
This commit is contained in:
parent
15b90ebf73
commit
5184e4d98d
@ -23,17 +23,22 @@ public abstract class AbstractEpisodeListProvider implements EpisodeListProvider
|
||||
|
||||
|
||||
public List<SearchResult> search(String query) throws Exception {
|
||||
return search(query, Locale.ENGLISH);
|
||||
return search(query, getDefaultLocale());
|
||||
}
|
||||
|
||||
|
||||
public List<Episode> getEpisodeList(SearchResult searchResult) throws Exception {
|
||||
return getEpisodeList(searchResult, Locale.ENGLISH);
|
||||
return getEpisodeList(searchResult, getDefaultLocale());
|
||||
}
|
||||
|
||||
|
||||
public List<Episode> getEpisodeList(SearchResult searchResult, int season) throws Exception {
|
||||
return getEpisodeList(searchResult, season, Locale.ENGLISH);
|
||||
return getEpisodeList(searchResult, season, getDefaultLocale());
|
||||
}
|
||||
|
||||
|
||||
public Locale getDefaultLocale() {
|
||||
return Locale.ENGLISH;
|
||||
}
|
||||
|
||||
|
||||
|
@ -10,19 +10,15 @@ import java.io.Serializable;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.AbstractList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.TreeMap;
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Set;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -33,9 +29,6 @@ import javax.swing.Icon;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
|
||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
||||
|
||||
import net.sf.ehcache.Cache;
|
||||
import net.sf.ehcache.CacheManager;
|
||||
import net.sf.ehcache.Element;
|
||||
@ -82,54 +75,16 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
|
||||
|
||||
@Override
|
||||
public List<SearchResult> search(String query, Locale locale) throws Exception {
|
||||
// normalize
|
||||
query = query.toLowerCase();
|
||||
|
||||
AbstractStringMetric metric = new QGramsDistance();
|
||||
|
||||
final List<Entry<SearchResult, Float>> resultSet = new ArrayList<Entry<SearchResult, Float>>();
|
||||
|
||||
for (AnidbSearchResult anime : getAnimeTitles()) {
|
||||
for (String name : new String[] { anime.getMainTitle(), anime.getEnglishTitle() }) {
|
||||
if (name != null) {
|
||||
// normalize
|
||||
name = name.toLowerCase();
|
||||
float similarity = metric.getSimilarity(name, query);
|
||||
|
||||
if (similarity > 0.5 || name.contains(query)) {
|
||||
resultSet.add(new SimpleEntry<SearchResult, Float>(anime, similarity));
|
||||
|
||||
// add only once
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sort by similarity descending (best matches first)
|
||||
Collections.sort(resultSet, new Comparator<Entry<SearchResult, Float>>() {
|
||||
public List<SearchResult> search(String query, final Locale locale) throws Exception {
|
||||
LocalSearch<AnidbSearchResult> index = new LocalSearch<AnidbSearchResult>(getAnimeTitles()) {
|
||||
|
||||
@Override
|
||||
public int compare(Entry<SearchResult, Float> o1, Entry<SearchResult, Float> o2) {
|
||||
return o2.getValue().compareTo(o1.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
// view for the first 20 search results
|
||||
return new AbstractList<SearchResult>() {
|
||||
|
||||
@Override
|
||||
public SearchResult get(int index) {
|
||||
return resultSet.get(index).getKey();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return Math.min(20, resultSet.size());
|
||||
protected Set<String> getFields(AnidbSearchResult anime) {
|
||||
return set(anime.getPrimaryTitle(), anime.getOfficialTitle("en"), anime.getOfficialTitle(locale.getLanguage()));
|
||||
}
|
||||
};
|
||||
|
||||
return new ArrayList<SearchResult>(index.search(query));
|
||||
}
|
||||
|
||||
|
||||
@ -225,8 +180,8 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
|
||||
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
|
||||
|
||||
Map<Integer, String> primaryTitleMap = new TreeMap<Integer, String>();
|
||||
Map<Integer, String> englishTitleMap = new HashMap<Integer, String>();
|
||||
Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>();
|
||||
Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>();
|
||||
|
||||
// fetch data
|
||||
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
|
||||
@ -236,10 +191,21 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
Matcher matcher = pattern.matcher(scanner.nextLine());
|
||||
|
||||
if (matcher.matches()) {
|
||||
if (matcher.group(2).equals("1")) {
|
||||
primaryTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4));
|
||||
} else if (matcher.group(2).equals("4") && matcher.group(3).equals("en")) {
|
||||
englishTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4));
|
||||
int aid = Integer.parseInt(matcher.group(1));
|
||||
String type = matcher.group(2);
|
||||
String language = matcher.group(3);
|
||||
String title = matcher.group(4);
|
||||
|
||||
if (type.equals("1")) {
|
||||
primaryTitleMap.put(aid, title);
|
||||
} else if (type.equals("4")) {
|
||||
Map<String, String> languageTitleMap = officialTitleMap.get(aid);
|
||||
if (languageTitleMap == null) {
|
||||
languageTitleMap = new HashMap<String, String>();
|
||||
officialTitleMap.put(aid, languageTitleMap);
|
||||
}
|
||||
|
||||
languageTitleMap.put(language, title);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -247,11 +213,11 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
scanner.close();
|
||||
}
|
||||
|
||||
// build up a list of all possible anidb search results
|
||||
// build up a list of all possible AniDB search results
|
||||
anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
|
||||
|
||||
for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
|
||||
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitleMap.get(entry.getKey())));
|
||||
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), officialTitleMap.get(entry.getKey())));
|
||||
}
|
||||
|
||||
// populate cache
|
||||
@ -264,19 +230,19 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
public static class AnidbSearchResult extends SearchResult implements Serializable {
|
||||
|
||||
protected int aid;
|
||||
protected String mainTitle;
|
||||
protected String englishTitle;
|
||||
protected String primaryTitle; // one per anime
|
||||
protected Map<String, String> officialTitle; // one per language
|
||||
|
||||
|
||||
|
||||
protected AnidbSearchResult() {
|
||||
// used by serializer
|
||||
}
|
||||
|
||||
|
||||
public AnidbSearchResult(int aid, String mainTitle, String englishTitle) {
|
||||
public AnidbSearchResult(int aid, String primaryTitle, Map<String, String> officialTitle) {
|
||||
this.aid = aid;
|
||||
this.mainTitle = mainTitle;
|
||||
this.englishTitle = englishTitle;
|
||||
this.primaryTitle = primaryTitle;
|
||||
this.officialTitle = officialTitle;
|
||||
}
|
||||
|
||||
|
||||
@ -287,17 +253,17 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return mainTitle;
|
||||
return primaryTitle;
|
||||
}
|
||||
|
||||
|
||||
public String getMainTitle() {
|
||||
return mainTitle;
|
||||
public String getPrimaryTitle() {
|
||||
return primaryTitle;
|
||||
}
|
||||
|
||||
|
||||
public String getEnglishTitle() {
|
||||
return englishTitle;
|
||||
public String getOfficialTitle(String key) {
|
||||
return officialTitle != null ? officialTitle.get(key) : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
129
source/net/sourceforge/filebot/web/LocalSearch.java
Normal file
129
source/net/sourceforge/filebot/web/LocalSearch.java
Normal file
@ -0,0 +1,129 @@
|
||||
|
||||
package net.sourceforge.filebot.web;
|
||||
|
||||
|
||||
import static java.util.Collections.*;
|
||||
|
||||
import java.util.AbstractList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
||||
|
||||
|
||||
class LocalSearch<T> {
|
||||
|
||||
private final AbstractStringMetric metric = new QGramsDistance();
|
||||
private final float resultMinimumSimilarity = 0.5f;
|
||||
private final int resultSetSize = 20;
|
||||
|
||||
private final List<T> objects;
|
||||
private final List<Set<String>> fields;
|
||||
|
||||
|
||||
public LocalSearch(Collection<? extends T> data) {
|
||||
objects = new ArrayList<T>(data);
|
||||
fields = new ArrayList<Set<String>>(objects.size());
|
||||
|
||||
for (int i = 0; i < objects.size(); i++) {
|
||||
fields.add(i, getFields(objects.get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<T> search(String query) throws ExecutionException, InterruptedException {
|
||||
final String q = normalize(query);
|
||||
List<Callable<Entry<T, Float>>> tasks = new ArrayList<Callable<Entry<T, Float>>>(objects.size());
|
||||
|
||||
for (int i = 0; i < objects.size(); i++) {
|
||||
final int index = i;
|
||||
tasks.add(new Callable<Entry<T, Float>>() {
|
||||
|
||||
@Override
|
||||
public Entry<T, Float> call() throws Exception {
|
||||
float similarity = 0;
|
||||
boolean match = false;
|
||||
|
||||
for (String field : fields.get(index)) {
|
||||
match |= field.contains(q);
|
||||
similarity = Math.max(metric.getSimilarity(q, field), similarity);
|
||||
}
|
||||
|
||||
return match || similarity > resultMinimumSimilarity ? new SimpleEntry<T, Float>(objects.get(index), similarity) : null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
||||
final List<Entry<T, Float>> resultSet = new ArrayList<Entry<T, Float>>(objects.size());
|
||||
|
||||
try {
|
||||
for (Future<Entry<T, Float>> entry : executor.invokeAll(tasks)) {
|
||||
if (entry.get() != null) {
|
||||
resultSet.add(entry.get());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
executor.shutdown();
|
||||
}
|
||||
|
||||
// sort by similarity descending (best matches first)
|
||||
sort(resultSet, new Comparator<Entry<T, Float>>() {
|
||||
|
||||
@Override
|
||||
public int compare(Entry<T, Float> o1, Entry<T, Float> o2) {
|
||||
return o2.getValue().compareTo(o1.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
// view for the first 20 search results
|
||||
return new AbstractList<T>() {
|
||||
|
||||
@Override
|
||||
public T get(int index) {
|
||||
return resultSet.get(index).getKey();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return Math.min(resultSetSize, resultSet.size());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
protected Set<String> getFields(T object) {
|
||||
return set(object.toString());
|
||||
}
|
||||
|
||||
|
||||
protected Set<String> set(String... values) {
|
||||
Set<String> set = new HashSet<String>(values.length);
|
||||
for (String value : values) {
|
||||
if (value != null) {
|
||||
set.add(normalize(value));
|
||||
}
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
|
||||
protected String normalize(String value) {
|
||||
// normalize separator, normalize case and trim
|
||||
return value.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim().toLowerCase();
|
||||
}
|
||||
|
||||
}
|
@ -10,16 +10,12 @@ import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.util.AbstractList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
import javax.swing.Icon;
|
||||
@ -28,9 +24,6 @@ import org.json.simple.JSONArray;
|
||||
import org.json.simple.JSONObject;
|
||||
import org.json.simple.JSONValue;
|
||||
|
||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
||||
|
||||
import net.sf.ehcache.Cache;
|
||||
import net.sf.ehcache.CacheManager;
|
||||
import net.sf.ehcache.Element;
|
||||
@ -63,55 +56,22 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
|
||||
|
||||
|
||||
@Override
|
||||
public List<SearchResult> search(String query, Locale locale) throws IOException {
|
||||
// normalize
|
||||
query = query.toLowerCase();
|
||||
|
||||
AbstractStringMetric metric = new QGramsDistance();
|
||||
|
||||
final List<Entry<SearchResult, Float>> resultSet = new ArrayList<Entry<SearchResult, Float>>();
|
||||
|
||||
for (SerienjunkiesSearchResult anime : getSeriesTitles()) {
|
||||
for (String name : new String[] { anime.getMainTitle(), anime.getGermanTitle() }) {
|
||||
if (name != null) {
|
||||
// normalize
|
||||
name = name.toLowerCase();
|
||||
|
||||
float similarity = metric.getSimilarity(name, query);
|
||||
|
||||
if (similarity > 0.5 || name.contains(query)) {
|
||||
resultSet.add(new SimpleEntry<SearchResult, Float>(anime, similarity));
|
||||
|
||||
// add only once
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sort by similarity descending (best matches first)
|
||||
Collections.sort(resultSet, new Comparator<Entry<SearchResult, Float>>() {
|
||||
|
||||
@Override
|
||||
public int compare(Entry<SearchResult, Float> o1, Entry<SearchResult, Float> o2) {
|
||||
return o2.getValue().compareTo(o1.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
// view for the first 20 search results
|
||||
return new AbstractList<SearchResult>() {
|
||||
|
||||
@Override
|
||||
public SearchResult get(int index) {
|
||||
return resultSet.get(index).getKey();
|
||||
}
|
||||
|
||||
public Locale getDefaultLocale() {
|
||||
return Locale.GERMAN;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<SearchResult> search(String query, Locale locale) throws Exception {
|
||||
LocalSearch<SerienjunkiesSearchResult> index = new LocalSearch<SerienjunkiesSearchResult>(getSeriesTitles()) {
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return Math.min(20, resultSet.size());
|
||||
protected Set<String> getFields(SerienjunkiesSearchResult series) {
|
||||
return set(series.getMainTitle(), series.getGermanTitle());
|
||||
}
|
||||
};
|
||||
|
||||
return new ArrayList<SearchResult>(index.search(query));
|
||||
}
|
||||
|
||||
|
||||
@ -158,6 +118,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
|
||||
// fetch episode data
|
||||
episodes = new ArrayList<Episode>(25);
|
||||
|
||||
String seriesName = locale.equals(Locale.GERMAN) && series.getGermanTitle() != null ? series.getGermanTitle() : series.getMainTitle();
|
||||
JSONObject data = (JSONObject) request("/allepisodes.php?d=" + apikey + "&q=" + series.getSeriesId());
|
||||
JSONArray list = (JSONArray) data.get("allepisodes");
|
||||
|
||||
@ -169,7 +130,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
|
||||
String title = (String) obj.get("german");
|
||||
Date airdate = Date.parse((String) ((JSONObject) obj.get("airdates")).get("premiere"), "yyyy-MM-dd");
|
||||
|
||||
episodes.add(new Episode(series.getName(), series.getStartDate(), season, episode, title, i + 1, null, airdate));
|
||||
episodes.add(new Episode(seriesName, series.getStartDate(), season, episode, title, i + 1, null, airdate));
|
||||
}
|
||||
|
||||
// populate cache
|
||||
@ -241,7 +202,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return germanTitle != null ? germanTitle : mainTitle; // prefer german title
|
||||
return germanTitle != null ? germanTitle : mainTitle; // prefer German title
|
||||
}
|
||||
|
||||
|
||||
|
@ -36,7 +36,7 @@ public class AnidbClientTest {
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
monsterSearchResult = new AnidbSearchResult(1539, "Monster", null);
|
||||
twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", "The Twelve Kingdoms");
|
||||
twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", null);
|
||||
princessTutuSearchResult = new AnidbSearchResult(516, "Princess Tutu", null);
|
||||
}
|
||||
|
||||
@ -49,7 +49,16 @@ public class AnidbClientTest {
|
||||
List<SearchResult> results = anidb.search("one piece");
|
||||
|
||||
AnidbSearchResult result = (AnidbSearchResult) results.get(0);
|
||||
assertEquals("One Piece", result.getName());
|
||||
assertEquals(69, result.getAnimeId());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void searchJapanese() throws Exception {
|
||||
List<SearchResult> results = anidb.search("ワンピース", Locale.JAPANESE);
|
||||
|
||||
AnidbSearchResult result = (AnidbSearchResult) results.get(0);
|
||||
assertEquals("One Piece", result.getName());
|
||||
assertEquals(69, result.getAnimeId());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user