+ support n-alias data files

This commit is contained in:
Reinhard Pointner 2013-09-07 15:48:24 +00:00
parent 3a7769ea2f
commit c227ec4bd9
11 changed files with 222 additions and 240 deletions

View File

@ -157,10 +157,15 @@ if (thetvdb_txt.size() < 30000) { throw new Exception('TheTVDB index sanity fail
// BUILD anidb-index.gz // BUILD anidb-index.gz
def anidb = new net.sourceforge.filebot.web.AnidbClient(null, 0).getAnimeTitles() def anidb = new net.sourceforge.filebot.web.AnidbClient(null, 0).getAnimeTitles()
def anidb_index = anidb.findResults{ [it.getAnimeId(), it.getPrimaryTitle(), it.getEnglishTitle()] } def anidb_index = anidb.findResults{
def row = []
row += it.getAnimeId().pad(5)
row += it.names*.replaceAll(/\s+/, ' ')*.replaceAll(/['`´ʻ]+/, /'/)*.trim().unique()
return row
}
// join and sort // join and sort
def anidb_txt = anidb_index.findResults{ [it[0].pad(5), it[1] ?: '', it[2] == null || it[2].equals(it[1]) ? '' : it[2]]*.replaceAll(/\s+/, ' ')*.trim().join('\t').replaceAll(/['`´ʻ]+/, /'/) }.sort().unique() def anidb_txt = anidb_index.findResults{ row -> row.join('\t') }.sort().unique()
pack(anidb_out, anidb_txt) pack(anidb_out, anidb_txt)
println "AniDB Index: " + anidb_txt.size() println "AniDB Index: " + anidb_txt.size()

View File

@ -2,9 +2,11 @@
package net.sourceforge.filebot; package net.sourceforge.filebot;
import static java.util.Arrays.*; import static java.util.Arrays.asList;
import static java.util.Collections.*; import static java.util.Collections.emptyList;
import static net.sourceforge.filebot.Settings.*; import static net.sourceforge.filebot.Settings.getApplicationName;
import static net.sourceforge.filebot.Settings.getApplicationProperty;
import static net.sourceforge.filebot.Settings.getApplicationVersion;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -142,7 +144,7 @@ public final class WebServices {
@Override @Override
protected Set<String> getFields(SearchResult object) { protected Set<String> getFields(SearchResult object) {
return set(object.getName()); return set(object.getNames());
} }
}; };

View File

@ -61,7 +61,6 @@ import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityComparator; import net.sourceforge.filebot.similarity.SimilarityComparator;
import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.web.AnidbSearchResult;
import net.sourceforge.filebot.web.Date; import net.sourceforge.filebot.web.Date;
import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.Movie; import net.sourceforge.filebot.web.Movie;
@ -377,13 +376,11 @@ public class MediaDetection {
public static synchronized List<Entry<String, SearchResult>> getSeriesIndex() throws IOException { public static synchronized List<Entry<String, SearchResult>> getSeriesIndex() throws IOException {
if (seriesIndex.isEmpty()) { if (seriesIndex.isEmpty()) {
try { try {
for (TheTVDBSearchResult it : releaseInfo.getTheTVDBIndex()) { for (SearchResult[] index : new SearchResult[][] { releaseInfo.getTheTVDBIndex(), releaseInfo.getAnidbIndex() }) {
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(it.getName()).toLowerCase(), it)); for (SearchResult item : index) {
} for (String name : item.getNames()) {
for (AnidbSearchResult it : releaseInfo.getAnidbIndex()) { seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(name).toLowerCase(), item));
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(it.getPrimaryTitle()).toLowerCase(), it)); }
if (it.getEnglishTitle() != null) {
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(it.getEnglishTitle()).toLowerCase(), it));
} }
} }
} catch (Exception e) { } catch (Exception e) {

View File

@ -1,12 +1,15 @@
package net.sourceforge.filebot.media; package net.sourceforge.filebot.media;
import static java.lang.Integer.parseInt;
import static java.util.Arrays.asList; import static java.util.Arrays.asList;
import static java.util.Arrays.copyOfRange;
import static java.util.Collections.unmodifiableMap; import static java.util.Collections.unmodifiableMap;
import static java.util.ResourceBundle.getBundle; import static java.util.ResourceBundle.getBundle;
import static java.util.regex.Pattern.CASE_INSENSITIVE; import static java.util.regex.Pattern.CASE_INSENSITIVE;
import static java.util.regex.Pattern.UNICODE_CASE; import static java.util.regex.Pattern.UNICODE_CASE;
import static java.util.regex.Pattern.compile; import static java.util.regex.Pattern.compile;
import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation; import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation;
import static net.sourceforge.tuned.FileUtilities.readCSV;
import static net.sourceforge.tuned.StringUtilities.join; import static net.sourceforge.tuned.StringUtilities.join;
import java.io.File; import java.io.File;
@ -26,7 +29,6 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Scanner;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -274,14 +276,15 @@ public class ReleaseInfo {
@Override @Override
public Movie[] process(ByteBuffer data) throws IOException { public Movie[] process(ByteBuffer data) throws IOException {
Scanner scanner = new Scanner(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n"); List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<Movie> movies = new ArrayList<Movie>(rows.size());
List<Movie> movies = new ArrayList<Movie>(); for (String[] row : rows) {
while (scanner.hasNext()) { int imdbid = parseInt(row[0]);
int imdbid = scanner.nextInt(); int year = parseInt(row[1]);
String name = scanner.next().trim(); String name = row[2];
int year = scanner.nextInt(); String[] aliasNames = copyOfRange(row, 3, row.length);
movies.add(new Movie(name, year, imdbid, -1)); movies.add(new Movie(name, aliasNames, year, imdbid, -1));
} }
return movies.toArray(new Movie[0]); return movies.toArray(new Movie[0]);
@ -296,13 +299,14 @@ public class ReleaseInfo {
@Override @Override
public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException { public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException {
Scanner scanner = new Scanner(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n"); List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<TheTVDBSearchResult> tvshows = new ArrayList<TheTVDBSearchResult>(rows.size());
List<TheTVDBSearchResult> tvshows = new ArrayList<TheTVDBSearchResult>(); for (String[] row : rows) {
while (scanner.hasNext() && scanner.hasNextInt()) { int id = parseInt(row[0]);
int id = scanner.nextInt(); String name = row[1];
String name = scanner.next().trim(); String[] aliasNames = copyOfRange(row, 2, row.length);
tvshows.add(new TheTVDBSearchResult(name, id)); tvshows.add(new TheTVDBSearchResult(name, aliasNames, id));
} }
return tvshows.toArray(new TheTVDBSearchResult[0]); return tvshows.toArray(new TheTVDBSearchResult[0]);
@ -317,15 +321,14 @@ public class ReleaseInfo {
@Override @Override
public AnidbSearchResult[] process(ByteBuffer data) throws IOException { public AnidbSearchResult[] process(ByteBuffer data) throws IOException {
Scanner scanner = new Scanner(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n"); List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<AnidbSearchResult> anime = new ArrayList<AnidbSearchResult>(rows.size());
List<AnidbSearchResult> anime = new ArrayList<AnidbSearchResult>(); for (String[] row : rows) {
while (scanner.hasNext() && scanner.hasNextInt()) { int aid = parseInt(row[0]);
int aid = scanner.nextInt(); String primaryTitle = row[1];
String primaryTitle = scanner.next().trim(); String[] aliasNames = copyOfRange(row, 2, row.length);
String englishTitle = scanner.next().trim(); anime.add(new AnidbSearchResult(aid, primaryTitle, aliasNames));
anime.add(new AnidbSearchResult(aid, primaryTitle, englishTitle.isEmpty() ? null : englishTitle));
} }
return anime.toArray(new AnidbSearchResult[0]); return anime.toArray(new AnidbSearchResult[0]);

View File

@ -1,10 +1,12 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.EpisodeUtilities.sortEpisodes;
import static net.sourceforge.filebot.web.EpisodeUtilities.*; import static net.sourceforge.filebot.web.WebRequest.getDocument;
import static net.sourceforge.filebot.web.WebRequest.*; import static net.sourceforge.tuned.XPathUtilities.getAttribute;
import static net.sourceforge.tuned.XPathUtilities.*; import static net.sourceforge.tuned.XPathUtilities.getChild;
import static net.sourceforge.tuned.XPathUtilities.getTextContent;
import static net.sourceforge.tuned.XPathUtilities.selectNodes;
import static net.sourceforge.tuned.XPathUtilities.selectString;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
@ -30,108 +32,98 @@ import net.sourceforge.filebot.ResourceManager;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Node; import org.w3c.dom.Node;
public class AnidbClient extends AbstractEpisodeListProvider { public class AnidbClient extends AbstractEpisodeListProvider {
private static final FloodLimit REQUEST_LIMIT = new FloodLimit(5, 12, TimeUnit.SECONDS); // no more than 5 requests within a 10 second window (+2 seconds for good measure) private static final FloodLimit REQUEST_LIMIT = new FloodLimit(5, 12, TimeUnit.SECONDS); // no more than 5 requests within a 10 second window (+2 seconds for good measure)
private final String host = "anidb.net"; private final String host = "anidb.net";
private final String client; private final String client;
private final int clientver; private final int clientver;
public AnidbClient(String client, int clientver) { public AnidbClient(String client, int clientver) {
this.client = client; this.client = client;
this.clientver = clientver; this.clientver = clientver;
} }
@Override @Override
public String getName() { public String getName() {
return "AniDB"; return "AniDB";
} }
@Override @Override
public Icon getIcon() { public Icon getIcon() {
return ResourceManager.getIcon("search.anidb"); return ResourceManager.getIcon("search.anidb");
} }
@Override @Override
public boolean hasSingleSeasonSupport() { public boolean hasSingleSeasonSupport() {
return false; return false;
} }
@Override @Override
public boolean hasLocaleSupport() { public boolean hasLocaleSupport() {
return true; return true;
} }
@Override @Override
public ResultCache getCache() { public ResultCache getCache() {
return new ResultCache(host, Cache.getCache("web-datasource-lv2")); return new ResultCache(host, Cache.getCache("web-datasource-lv2"));
} }
@Override @Override
public List<SearchResult> search(String query, final Locale locale) throws Exception { public List<SearchResult> search(String query, final Locale locale) throws Exception {
// bypass automatic caching since search is based on locally cached data anyway // bypass automatic caching since search is based on locally cached data anyway
return fetchSearchResult(query, locale); return fetchSearchResult(query, locale);
} }
@Override @Override
public List<SearchResult> fetchSearchResult(String query, final Locale locale) throws Exception { public List<SearchResult> fetchSearchResult(String query, final Locale locale) throws Exception {
LocalSearch<AnidbSearchResult> index = new LocalSearch<AnidbSearchResult>(getAnimeTitles()) { LocalSearch<SearchResult> index = new LocalSearch<SearchResult>(getAnimeTitles()) {
@Override @Override
protected Set<String> getFields(AnidbSearchResult anime) { protected Set<String> getFields(SearchResult it) {
return set(anime.getPrimaryTitle(), anime.getEnglishTitle()); return set(it.getNames());
} }
}; };
return new ArrayList<SearchResult>(index.search(query)); return new ArrayList<SearchResult>(index.search(query));
} }
@Override @Override
public List<Episode> fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale language) throws Exception { public List<Episode> fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale language) throws Exception {
AnidbSearchResult anime = (AnidbSearchResult) searchResult; AnidbSearchResult anime = (AnidbSearchResult) searchResult;
// e.g. http://api.anidb.net:9001/httpapi?request=anime&client=filebot&clientver=1&protover=1&aid=4521 // e.g. http://api.anidb.net:9001/httpapi?request=anime&client=filebot&clientver=1&protover=1&aid=4521
URL url = new URL("http", "api." + host, 9001, "/httpapi?request=anime&client=" + client + "&clientver=" + clientver + "&protover=1&aid=" + anime.getAnimeId()); URL url = new URL("http", "api." + host, 9001, "/httpapi?request=anime&client=" + client + "&clientver=" + clientver + "&protover=1&aid=" + anime.getAnimeId());
// respect flood protection limits // respect flood protection limits
REQUEST_LIMIT.acquirePermit(); REQUEST_LIMIT.acquirePermit();
// get anime page as xml // get anime page as xml
Document dom = getDocument(url); Document dom = getDocument(url);
// select main title and anime start date // select main title and anime start date
Date seriesStartDate = Date.parse(selectString("//startdate", dom), "yyyy-MM-dd"); Date seriesStartDate = Date.parse(selectString("//startdate", dom), "yyyy-MM-dd");
String animeTitle = selectString("//titles/title[@type='official' and @lang='" + language.getLanguage() + "']", dom); String animeTitle = selectString("//titles/title[@type='official' and @lang='" + language.getLanguage() + "']", dom);
if (animeTitle.isEmpty()) { if (animeTitle.isEmpty()) {
animeTitle = selectString("//titles/title[@type='main']", dom); animeTitle = selectString("//titles/title[@type='main']", dom);
} }
List<Episode> episodes = new ArrayList<Episode>(25); List<Episode> episodes = new ArrayList<Episode>(25);
for (Node node : selectNodes("//episode", dom)) { for (Node node : selectNodes("//episode", dom)) {
Node epno = getChild("epno", node); Node epno = getChild("epno", node);
int number = Integer.parseInt(getTextContent(epno).replaceAll("\\D", "")); int number = Integer.parseInt(getTextContent(epno).replaceAll("\\D", ""));
int type = Integer.parseInt(getAttribute("type", epno)); int type = Integer.parseInt(getAttribute("type", epno));
if (type == 1 || type == 2) { if (type == 1 || type == 2) {
Date airdate = Date.parse(getTextContent("airdate", node), "yyyy-MM-dd"); Date airdate = Date.parse(getTextContent("airdate", node), "yyyy-MM-dd");
String title = selectString(".//title[@lang='" + language.getLanguage() + "']", node); String title = selectString(".//title[@lang='" + language.getLanguage() + "']", node);
if (title.isEmpty()) { // English language fall-back if (title.isEmpty()) { // English language fall-back
title = selectString(".//title[@lang='en']", node); title = selectString(".//title[@lang='en']", node);
} }
if (type == 1) { if (type == 1) {
episodes.add(new Episode(animeTitle, seriesStartDate, null, number, title, number, null, airdate, searchResult)); // normal episode, no seasons for anime episodes.add(new Episode(animeTitle, seriesStartDate, null, number, title, number, null, airdate, searchResult)); // normal episode, no seasons for anime
} else { } else {
@ -139,20 +131,19 @@ public class AnidbClient extends AbstractEpisodeListProvider {
} }
} }
} }
// make sure episodes are in ordered correctly // make sure episodes are in ordered correctly
sortEpisodes(episodes); sortEpisodes(episodes);
// sanity check // sanity check
if (episodes.isEmpty()) { if (episodes.isEmpty()) {
// anime page xml doesn't work sometimes // anime page xml doesn't work sometimes
throw new RuntimeException(String.format("Failed to parse episode data from xml: %s (%d)", anime, anime.getAnimeId())); throw new RuntimeException(String.format("Failed to parse episode data from xml: %s (%d)", anime, anime.getAnimeId()));
} }
return episodes; return episodes;
} }
@Override @Override
public URI getEpisodeListLink(SearchResult searchResult) { public URI getEpisodeListLink(SearchResult searchResult) {
try { try {
@ -161,39 +152,38 @@ public class AnidbClient extends AbstractEpisodeListProvider {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
public synchronized List<AnidbSearchResult> getAnimeTitles() throws Exception { public synchronized List<AnidbSearchResult> getAnimeTitles() throws Exception {
URL url = new URL("http", host, "/api/anime-titles.dat.gz"); URL url = new URL("http", host, "/api/anime-titles.dat.gz");
ResultCache cache = getCache(); ResultCache cache = getCache();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
List<AnidbSearchResult> anime = (List) cache.getSearchResult(null, Locale.ROOT); List<AnidbSearchResult> anime = (List) cache.getSearchResult(null, Locale.ROOT);
if (anime != null) { if (anime != null) {
return anime; return anime;
} }
// <aid>|<type>|<language>|<title> // <aid>|<type>|<language>|<title>
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language) // type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$"); Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>(); Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>();
Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>(); Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>();
Map<Integer, Map<String, String>> synonymsTitleMap = new HashMap<Integer, Map<String, String>>(); Map<Integer, Map<String, String>> synonymsTitleMap = new HashMap<Integer, Map<String, String>>();
// fetch data // fetch data
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8"); Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
try { try {
while (scanner.hasNextLine()) { while (scanner.hasNextLine()) {
Matcher matcher = pattern.matcher(scanner.nextLine()); Matcher matcher = pattern.matcher(scanner.nextLine());
if (matcher.matches()) { if (matcher.matches()) {
int aid = Integer.parseInt(matcher.group(1)); int aid = Integer.parseInt(matcher.group(1));
String type = matcher.group(2); String type = matcher.group(2);
String language = matcher.group(3); String language = matcher.group(3);
String title = matcher.group(4); String title = matcher.group(4);
if (type.equals("1")) { if (type.equals("1")) {
primaryTitleMap.put(aid, title); primaryTitleMap.put(aid, title);
} else if (type.equals("2") || type.equals("4")) { } else if (type.equals("2") || type.equals("4")) {
@ -203,7 +193,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
languageTitleMap = new HashMap<String, String>(); languageTitleMap = new HashMap<String, String>();
titleMap.put(aid, languageTitleMap); titleMap.put(aid, languageTitleMap);
} }
languageTitleMap.put(language, title); languageTitleMap.put(language, title);
} }
} }
@ -211,10 +201,10 @@ public class AnidbClient extends AbstractEpisodeListProvider {
} finally { } finally {
scanner.close(); scanner.close();
} }
// build up a list of all possible AniDB search results // build up a list of all possible AniDB search results
anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size()); anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) { for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
Map<String, String> localizedTitles = new HashMap<String, String>(); Map<String, String> localizedTitles = new HashMap<String, String>();
if (synonymsTitleMap.containsKey(entry.getKey())) { if (synonymsTitleMap.containsKey(entry.getKey())) {
@ -223,12 +213,13 @@ public class AnidbClient extends AbstractEpisodeListProvider {
if (officialTitleMap.containsKey(entry.getKey())) { if (officialTitleMap.containsKey(entry.getKey())) {
localizedTitles.putAll(officialTitleMap.get(entry.getKey())); // primarily use official title if available localizedTitles.putAll(officialTitleMap.get(entry.getKey())); // primarily use official title if available
} }
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), localizedTitles.get("en"))); String englishTitle = localizedTitles.get("en"); // ONLY SUPPORT ENGLISH LOCALIZATION
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitle == null || englishTitle.isEmpty() ? new String[] {} : new String[] { englishTitle }));
} }
// populate cache // populate cache
return cache.putSearchResult(null, Locale.ROOT, anime); return cache.putSearchResult(null, Locale.ROOT, anime);
} }
} }

View File

@ -8,8 +8,8 @@ public class AnidbSearchResult extends SearchResult {
// used by serializer // used by serializer
} }
public AnidbSearchResult(int aid, String primaryTitle, String englishTitle) { public AnidbSearchResult(int aid, String primaryTitle, String[] localizedTitles) {
super(primaryTitle, englishTitle); super(primaryTitle, localizedTitles);
this.aid = aid; this.aid = aid;
} }
@ -30,10 +30,6 @@ public class AnidbSearchResult extends SearchResult {
return name; return name;
} }
public String getEnglishTitle() {
return aliasNames.length > 0 ? aliasNames[0] : null;
}
@Override @Override
public int hashCode() { public int hashCode() {
return aid; return aid;

View File

@ -1,9 +1,8 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import static java.util.Collections.singleton;
import static java.util.Collections.*; import static java.util.Collections.sort;
import static net.sourceforge.filebot.similarity.Normalization.*; import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation;
import java.util.AbstractList; import java.util.AbstractList;
import java.util.AbstractMap.SimpleEntry; import java.util.AbstractMap.SimpleEntry;
@ -25,61 +24,58 @@ import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
import com.ibm.icu.text.Transliterator; import com.ibm.icu.text.Transliterator;
public class LocalSearch<T> { public class LocalSearch<T> {
private final AbstractStringMetric metric = new QGramsDistance(); private final AbstractStringMetric metric = new QGramsDistance();
private float resultMinimumSimilarity = 0.5f; private float resultMinimumSimilarity = 0.5f;
private int resultSetSize = 20; private int resultSetSize = 20;
private final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove"); private final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
private final List<T> objects; private final List<T> objects;
private final List<Set<String>> fields; private final List<Set<String>> fields;
public LocalSearch(Collection<? extends T> data) { public LocalSearch(Collection<? extends T> data) {
objects = new ArrayList<T>(data); objects = new ArrayList<T>(data);
fields = new ArrayList<Set<String>>(objects.size()); fields = new ArrayList<Set<String>>(objects.size());
for (int i = 0; i < objects.size(); i++) { for (int i = 0; i < objects.size(); i++) {
fields.add(i, getFields(objects.get(i))); fields.add(i, getFields(objects.get(i)));
} }
} }
public List<T> search(String query) throws ExecutionException, InterruptedException { public List<T> search(String query) throws ExecutionException, InterruptedException {
final String q = normalize(query); final String q = normalize(query);
List<Callable<Entry<T, Float>>> tasks = new ArrayList<Callable<Entry<T, Float>>>(objects.size()); List<Callable<Entry<T, Float>>> tasks = new ArrayList<Callable<Entry<T, Float>>>(objects.size());
for (int i = 0; i < objects.size(); i++) { for (int i = 0; i < objects.size(); i++) {
final int index = i; final int index = i;
tasks.add(new Callable<Entry<T, Float>>() { tasks.add(new Callable<Entry<T, Float>>() {
@Override @Override
public Entry<T, Float> call() throws Exception { public Entry<T, Float> call() throws Exception {
float similarity = 0; float similarity = 0;
boolean match = false; boolean match = false;
for (String field : fields.get(index)) { for (String field : fields.get(index)) {
match |= field.contains(q); match |= field.contains(q);
similarity = Math.max(metric.getSimilarity(q, field), similarity); similarity = Math.max(metric.getSimilarity(q, field), similarity);
} }
return match || similarity > resultMinimumSimilarity ? new SimpleEntry<T, Float>(objects.get(index), similarity) : null; return match || similarity > resultMinimumSimilarity ? new SimpleEntry<T, Float>(objects.get(index), similarity) : null;
} }
}); });
} }
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
final List<Entry<T, Float>> resultSet = new ArrayList<Entry<T, Float>>(objects.size()); final List<Entry<T, Float>> resultSet = new ArrayList<Entry<T, Float>>(objects.size());
try { try {
for (Future<Entry<T, Float>> entry : executor.invokeAll(tasks)) { for (Future<Entry<T, Float>> entry : executor.invokeAll(tasks)) {
if (entry.get() != null) { if (entry.get() != null) {
resultSet.add(entry.get()); resultSet.add(entry.get());
} }
if (Thread.interrupted()) { if (Thread.interrupted()) {
throw new InterruptedException(); throw new InterruptedException();
} }
@ -87,50 +83,45 @@ public class LocalSearch<T> {
} finally { } finally {
executor.shutdownNow(); executor.shutdownNow();
} }
// sort by similarity descending (best matches first) // sort by similarity descending (best matches first)
sort(resultSet, new Comparator<Entry<T, Float>>() { sort(resultSet, new Comparator<Entry<T, Float>>() {
@Override @Override
public int compare(Entry<T, Float> o1, Entry<T, Float> o2) { public int compare(Entry<T, Float> o1, Entry<T, Float> o2) {
return o2.getValue().compareTo(o1.getValue()); return o2.getValue().compareTo(o1.getValue());
} }
}); });
// view for the first 20 search results // view for the first 20 search results
return new AbstractList<T>() { return new AbstractList<T>() {
@Override @Override
public T get(int index) { public T get(int index) {
return resultSet.get(index).getKey(); return resultSet.get(index).getKey();
} }
@Override @Override
public int size() { public int size() {
return Math.min(resultSetSize, resultSet.size()); return Math.min(resultSetSize, resultSet.size());
} }
}; };
} }
public void setResultMinimumSimilarity(float resultMinimumSimilarity) { public void setResultMinimumSimilarity(float resultMinimumSimilarity) {
this.resultMinimumSimilarity = resultMinimumSimilarity; this.resultMinimumSimilarity = resultMinimumSimilarity;
} }
public void setResultSetSize(int resultSetSize) { public void setResultSetSize(int resultSetSize) {
this.resultSetSize = resultSetSize; this.resultSetSize = resultSetSize;
} }
protected Set<String> getFields(T object) { protected Set<String> getFields(T object) {
return set(object.toString()); return set(singleton(object.toString()));
} }
protected Set<String> set(Collection<String> values) {
protected Set<String> set(String... values) { Set<String> set = new HashSet<String>(values.size());
Set<String> set = new HashSet<String>(values.length);
for (String value : values) { for (String value : values) {
if (value != null) { if (value != null) {
set.add(normalize(value)); set.add(normalize(value));
@ -138,11 +129,10 @@ public class LocalSearch<T> {
} }
return set; return set;
} }
protected String normalize(String value) { protected String normalize(String value) {
// normalize separator, normalize case and trim // normalize separator, normalize case and trim
return normalizePunctuation(transliterator.transform(value)).toLowerCase(); return normalizePunctuation(transliterator.transform(value)).toLowerCase();
} }
} }

View File

@ -1,6 +1,8 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import java.io.Serializable; import java.io.Serializable;
import java.util.AbstractList;
import java.util.List;
public abstract class SearchResult implements Serializable { public abstract class SearchResult implements Serializable {
@ -24,6 +26,21 @@ public abstract class SearchResult implements Serializable {
return aliasNames.clone(); return aliasNames.clone();
} }
public List<String> getNames() {
return new AbstractList<String>() {
@Override
public String get(int index) {
return index == 0 ? name : aliasNames[index - 1];
}
@Override
public int size() {
return 1 + aliasNames.length;
}
};
}
@Override @Override
public String toString() { public String toString() {
return name; return name;

View File

@ -1,15 +1,15 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.EpisodeUtilities.sortEpisodes;
import static net.sourceforge.filebot.web.EpisodeUtilities.*; import static net.sourceforge.filebot.web.WebRequest.createIgnoreCertificateSocketFactory;
import static net.sourceforge.filebot.web.WebRequest.*; import static net.sourceforge.filebot.web.WebRequest.getReader;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.net.URI; import java.net.URI;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Set; import java.util.Set;
@ -24,142 +24,140 @@ import org.json.simple.JSONArray;
import org.json.simple.JSONObject; import org.json.simple.JSONObject;
import org.json.simple.JSONValue; import org.json.simple.JSONValue;
public class SerienjunkiesClient extends AbstractEpisodeListProvider { public class SerienjunkiesClient extends AbstractEpisodeListProvider {
private final String host = "api.serienjunkies.de"; private final String host = "api.serienjunkies.de";
private final String apikey; private final String apikey;
public SerienjunkiesClient(String apikey) { public SerienjunkiesClient(String apikey) {
this.apikey = apikey; this.apikey = apikey;
} }
@Override @Override
public String getName() { public String getName() {
return "Serienjunkies"; return "Serienjunkies";
} }
@Override @Override
public Icon getIcon() { public Icon getIcon() {
return ResourceManager.getIcon("search.serienjunkies"); return ResourceManager.getIcon("search.serienjunkies");
} }
@Override @Override
public Locale getDefaultLocale() { public Locale getDefaultLocale() {
return Locale.GERMAN; return Locale.GERMAN;
} }
@Override @Override
public ResultCache getCache() { public ResultCache getCache() {
return new ResultCache(host, Cache.getCache("web-datasource")); return new ResultCache(host, Cache.getCache("web-datasource"));
} }
@Override @Override
public List<SearchResult> search(String query, final Locale locale) throws Exception { public List<SearchResult> search(String query, final Locale locale) throws Exception {
// bypass automatic caching since search is based on locally cached data anyway // bypass automatic caching since search is based on locally cached data anyway
return fetchSearchResult(query, locale); return fetchSearchResult(query, locale);
} }
@Override @Override
public List<SearchResult> fetchSearchResult(String query, Locale locale) throws Exception { public List<SearchResult> fetchSearchResult(String query, Locale locale) throws Exception {
LocalSearch<SerienjunkiesSearchResult> index = new LocalSearch<SerienjunkiesSearchResult>(getSeriesTitles()) { LocalSearch<SearchResult> index = new LocalSearch<SearchResult>(getSeriesTitles()) {
@Override @Override
protected Set<String> getFields(SerienjunkiesSearchResult series) { protected Set<String> getFields(SearchResult series) {
return set(series.getMainTitle(), series.getGermanTitle()); return set(series.getNames());
} }
}; };
return new ArrayList<SearchResult>(index.search(query)); return new ArrayList<SearchResult>(index.search(query));
} }
protected synchronized List<SerienjunkiesSearchResult> getSeriesTitles() throws IOException { protected synchronized List<SerienjunkiesSearchResult> getSeriesTitles() throws IOException {
ResultCache cache = getCache(); ResultCache cache = getCache();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
List<SerienjunkiesSearchResult> seriesList = (List) cache.getSearchResult(null, Locale.ROOT); List<SerienjunkiesSearchResult> seriesList = (List) cache.getSearchResult(null, Locale.ROOT);
if (seriesList != null) { if (seriesList != null) {
return seriesList; return seriesList;
} }
// fetch series data // fetch series data
seriesList = new ArrayList<SerienjunkiesSearchResult>(); seriesList = new ArrayList<SerienjunkiesSearchResult>();
JSONObject data = (JSONObject) request("/allseries.php?d=" + apikey); JSONObject data = (JSONObject) request("/allseries.php?d=" + apikey);
JSONArray list = (JSONArray) data.get("allseries"); JSONArray list = (JSONArray) data.get("allseries");
for (Object element : list) { for (Object element : list) {
JSONObject obj = (JSONObject) element; JSONObject obj = (JSONObject) element;
Integer sid = new Integer((String) obj.get("id")); Integer sid = new Integer((String) obj.get("id"));
String link = (String) obj.get("link"); String link = (String) obj.get("link");
String mainTitle = (String) obj.get("short"); String mainTitle = (String) obj.get("short");
String germanTitle = (String) obj.get("short_german"); String germanTitle = (String) obj.get("short_german");
Date startDate = Date.parse((String) obj.get("firstepisode"), "yyyy-MM-dd"); Date startDate = Date.parse((String) obj.get("firstepisode"), "yyyy-MM-dd");
seriesList.add(new SerienjunkiesSearchResult(sid, link, mainTitle, germanTitle != null && !germanTitle.isEmpty() ? germanTitle : null, startDate)); Set<String> titleSet = new LinkedHashSet<String>(2);
for (String title : new String[] { germanTitle, mainTitle }) {
if (title != null && title.length() > 0) {
titleSet.add(title);
}
}
if (titleSet.size() > 0) {
List<String> titleList = new ArrayList<String>(titleSet);
seriesList.add(new SerienjunkiesSearchResult(sid, link, titleList.get(0), titleList.subList(1, titleList.size()).toArray(new String[0]), startDate));
}
} }
// populate cache // populate cache
return cache.putSearchResult(null, Locale.ROOT, seriesList); return cache.putSearchResult(null, Locale.ROOT, seriesList);
} }
@Override @Override
public List<Episode> fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale locale) throws IOException { public List<Episode> fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale locale) throws IOException {
SerienjunkiesSearchResult series = (SerienjunkiesSearchResult) searchResult; SerienjunkiesSearchResult series = (SerienjunkiesSearchResult) searchResult;
// fetch episode data // fetch episode data
List<Episode> episodes = new ArrayList<Episode>(25); List<Episode> episodes = new ArrayList<Episode>(25);
String seriesName = locale.equals(Locale.GERMAN) && series.getGermanTitle() != null ? series.getGermanTitle() : series.getMainTitle(); String seriesName = series.getName();
JSONObject data = (JSONObject) request("/allepisodes.php?d=" + apikey + "&q=" + series.getSeriesId()); JSONObject data = (JSONObject) request("/allepisodes.php?d=" + apikey + "&q=" + series.getSeriesId());
JSONArray list = (JSONArray) data.get("allepisodes"); JSONArray list = (JSONArray) data.get("allepisodes");
for (int i = 0; i < list.size(); i++) { for (int i = 0; i < list.size(); i++) {
JSONObject obj = (JSONObject) list.get(i); JSONObject obj = (JSONObject) list.get(i);
Integer season = new Integer((String) obj.get("season")); Integer season = new Integer((String) obj.get("season"));
Integer episode = new Integer((String) obj.get("episode")); Integer episode = new Integer((String) obj.get("episode"));
Date airdate = Date.parse((String) ((JSONObject) obj.get("airdates")).get("premiere"), "yyyy-MM-dd"); Date airdate = Date.parse((String) ((JSONObject) obj.get("airdates")).get("premiere"), "yyyy-MM-dd");
String title = (String) obj.get("original"); String title = (String) obj.get("original");
String german = (String) obj.get("german"); String german = (String) obj.get("german");
if (title == null || (Locale.GERMAN.equals(locale) && german != null)) { if (title == null || (Locale.GERMAN.equals(locale) && german != null)) {
title = german; title = german;
} }
// enforce sanity // enforce sanity
if (title == null) { if (title == null) {
title = ""; title = "";
} }
episodes.add(new Episode(seriesName, series.getStartDate(), season, episode, title, i + 1, null, airdate, searchResult)); episodes.add(new Episode(seriesName, series.getStartDate(), season, episode, title, i + 1, null, airdate, searchResult));
} }
// make sure episodes are in ordered correctly // make sure episodes are in ordered correctly
sortEpisodes(episodes); sortEpisodes(episodes);
return episodes; return episodes;
} }
protected Object request(String resource) throws IOException { protected Object request(String resource) throws IOException {
URL url = new URL("https", host, resource); URL url = new URL("https", host, resource);
HttpsURLConnection connection = (HttpsURLConnection) url.openConnection(); HttpsURLConnection connection = (HttpsURLConnection) url.openConnection();
// disable SSL certificate validation // disable SSL certificate validation
connection.setSSLSocketFactory(createIgnoreCertificateSocketFactory()); connection.setSSLSocketFactory(createIgnoreCertificateSocketFactory());
// fetch and parse JSON data // fetch and parse JSON data
Reader reader = getReader(connection); Reader reader = getReader(connection);
try { try {
@ -168,11 +166,10 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
reader.close(); reader.close();
} }
} }
@Override @Override
public URI getEpisodeListLink(SearchResult searchResult) { public URI getEpisodeListLink(SearchResult searchResult) {
return URI.create(String.format("http://www.serienjunkies.de/%s/alle-serien-staffeln.html", ((SerienjunkiesSearchResult) searchResult).getLink())); return URI.create(String.format("http://www.serienjunkies.de/%s/alle-serien-staffeln.html", ((SerienjunkiesSearchResult) searchResult).getLink()));
} }
} }

View File

@ -1,79 +1,50 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
public class SerienjunkiesSearchResult extends SearchResult { public class SerienjunkiesSearchResult extends SearchResult {
protected int sid; protected int sid;
protected String link; protected String link;
protected String mainTitle;
protected String germanTitle;
protected Date startDate; protected Date startDate;
protected SerienjunkiesSearchResult() { protected SerienjunkiesSearchResult() {
// used by serializer // used by serializer
} }
public SerienjunkiesSearchResult(int sid, String link, String germanTitle, String[] otherTitles, Date startDate) {
public SerienjunkiesSearchResult(int sid, String link, String mainTitle, String germanTitle, Date startDate) { super(germanTitle, otherTitles);
this.sid = sid; this.sid = sid;
this.link = link; this.link = link;
this.mainTitle = mainTitle;
this.germanTitle = germanTitle;
this.startDate = startDate; this.startDate = startDate;
} }
public int getId() { public int getId() {
return sid; return sid;
} }
@Override
public String getName() {
return germanTitle != null ? germanTitle : mainTitle; // prefer German title
}
public int getSeriesId() { public int getSeriesId() {
return sid; return sid;
} }
public String getLink() { public String getLink() {
return link; return link;
} }
public String getMainTitle() {
return mainTitle;
}
public String getGermanTitle() {
return germanTitle;
}
public Date getStartDate() { public Date getStartDate() {
return startDate; return startDate;
} }
@Override @Override
public int hashCode() { public int hashCode() {
return sid; return sid;
} }
@Override @Override
public boolean equals(Object object) { public boolean equals(Object object) {
if (object instanceof SerienjunkiesSearchResult) { if (object instanceof SerienjunkiesSearchResult) {
SerienjunkiesSearchResult other = (SerienjunkiesSearchResult) object; SerienjunkiesSearchResult other = (SerienjunkiesSearchResult) object;
return this.sid == other.sid; return this.sid == other.sid;
} }
return false; return false;
} }
} }

View File

@ -21,6 +21,7 @@ import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Scanner;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -160,6 +161,18 @@ public final class FileUtilities {
} }
} }
public static List<String[]> readCSV(InputStream source, String charsetName, String separatorPattern) {
Scanner scanner = new Scanner(source, charsetName);
Pattern separator = Pattern.compile(separatorPattern);
List<String[]> rows = new ArrayList<String[]>(65536);
while (scanner.hasNextLine()) {
rows.add(separator.split(scanner.nextLine()));
}
return rows;
}
public static Reader createTextReader(File file) throws IOException { public static Reader createTextReader(File file) throws IOException {
CharsetDetector detector = new CharsetDetector(); CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding