* use "matching title" in anidb search results
* use official english anime title * much faster episode information extraction (less xpath)
This commit is contained in:
parent
7601be3b46
commit
ec4254e687
|
@ -281,11 +281,14 @@ public class SeriesNameMatcher {
|
|||
}
|
||||
|
||||
|
||||
protected String[] names(List<File> files) {
|
||||
protected String[] names(Collection<File> files) {
|
||||
String[] names = new String[files.size()];
|
||||
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
names[i] = FileUtilities.getName(files.get(i));
|
||||
int i = 0;
|
||||
|
||||
// fill array
|
||||
for (File file : files) {
|
||||
names[i++] = FileUtilities.getName(file);
|
||||
}
|
||||
|
||||
return names;
|
||||
|
|
|
@ -12,6 +12,7 @@ import java.net.URL;
|
|||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
|
@ -43,47 +44,80 @@ public class AnidbClient implements EpisodeListProvider {
|
|||
|
||||
@Override
|
||||
public List<SearchResult> search(String query) throws IOException, SAXException {
|
||||
|
||||
// Air Status: ignore
|
||||
// Anime Type: TV Series, TV Special, OVA
|
||||
// Hide Synonyms: true
|
||||
URL searchUrl = new URL("http", host, "/perl-bin/animedb.pl?type.tvspecial=1&type.tvseries=1&type.ova=1&show=animelist&orderby.name=0.1&noalias=1&do.update=update&adb.search=" + URLEncoder.encode(query, "UTF-8"));
|
||||
|
||||
Document dom = getHtmlDocument(searchUrl);
|
||||
|
||||
List<Node> nodes = selectNodes("//TABLE[@class='animelist']//TR/TD/ancestor::TR", dom);
|
||||
|
||||
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
|
||||
List<SearchResult> results = new ArrayList<SearchResult>(nodes.size());
|
||||
|
||||
for (Node node : nodes) {
|
||||
Node titleNode = selectNode("./TD[@class='name']/A", node);
|
||||
Node link = selectNode("./TD[@class='name']/A", node);
|
||||
|
||||
String title = getTextContent(titleNode);
|
||||
String href = getAttribute("href", titleNode);
|
||||
// prefer title that is similar to the search query
|
||||
String title = selectString("./following-sibling::*[@class='match']", link);
|
||||
|
||||
// remove leading and trailing parenthesis
|
||||
title = title.replaceAll("(^\\()|(\\)$)", "");
|
||||
|
||||
if (title.isEmpty()) {
|
||||
// fallback: use main title
|
||||
title = getTextContent(link);
|
||||
}
|
||||
|
||||
// anime page
|
||||
String href = getAttribute("href", link);
|
||||
|
||||
try {
|
||||
searchResults.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
|
||||
results.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href);
|
||||
}
|
||||
}
|
||||
|
||||
// we might have been redirected to the episode list page
|
||||
if (searchResults.isEmpty()) {
|
||||
// check if current page contains an episode list
|
||||
if (exists("//TABLE[@class='eplist']", dom)) {
|
||||
// get show's name from the document
|
||||
String header = selectString("id('layout-content')//H1[1]", dom);
|
||||
String name = header.replaceFirst("Anime:\\s*", "");
|
||||
|
||||
String episodeListUrl = selectString("id('layout-main')//DIV[@class='data']//A[@class='short_link']/@href", dom);
|
||||
|
||||
try {
|
||||
searchResults.add(new HyperLink(name, new URL(episodeListUrl)));
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + episodeListUrl);
|
||||
}
|
||||
if (results.isEmpty()) {
|
||||
// get anime information from document
|
||||
String title = selectTitle(dom);
|
||||
String link = selectString("//*[@class='data']//A[@class='short_link']/@href", dom);
|
||||
|
||||
try {
|
||||
// insert single entry
|
||||
results.add(new HyperLink(title, new URL(link)));
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + link);
|
||||
}
|
||||
}
|
||||
|
||||
return searchResults;
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
protected String selectTitle(Document animePage) {
|
||||
// prefer official english title
|
||||
String title = selectOfficialTitle(animePage, Locale.ENGLISH);
|
||||
|
||||
if (title.isEmpty()) {
|
||||
// fallback: extract name from header (e.g. "Anime: Naruto")
|
||||
title = selectString("//H1", animePage).replaceFirst("Anime:\\s*", "");;
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
||||
|
||||
|
||||
protected String selectOfficialTitle(Document animePage, Locale language) {
|
||||
// create xpath query for official title of the given language
|
||||
// e.g. //*[@class='data']//*[contains(@class, 'official') and .//*[contains(@title, 'english')]]//LABEL
|
||||
|
||||
String condition = String.format(".//*[contains(@title, '%s')]", language.getDisplayLanguage(Locale.ENGLISH).toLowerCase());
|
||||
String xpath = String.format("//*[@class='data']//*[contains(@class, 'official') and %s]//LABEL", condition);
|
||||
|
||||
return selectString(xpath, animePage);
|
||||
}
|
||||
|
||||
|
||||
|
@ -92,22 +126,23 @@ public class AnidbClient implements EpisodeListProvider {
|
|||
|
||||
Document dom = getHtmlDocument(getEpisodeListLink(searchResult).toURL());
|
||||
|
||||
// use title from anime page
|
||||
String animeTitle = selectTitle(dom);
|
||||
|
||||
List<Node> nodes = selectNodes("id('eplist')//TR/TD/SPAN/ancestor::TR", dom);
|
||||
|
||||
ArrayList<Episode> episodes = new ArrayList<Episode>(nodes.size());
|
||||
|
||||
for (Node node : nodes) {
|
||||
String number = selectString("./TD[contains(@class,'id')]/A", node);
|
||||
String title = selectString("./TD[@class='title']/LABEL/text()", node);
|
||||
List<Node> columns = getChildren("TD", node);
|
||||
|
||||
if (title.startsWith("recap")) {
|
||||
title = title.replaceFirst("recap", "");
|
||||
}
|
||||
String number = columns.get(0).getTextContent().trim();
|
||||
String title = columns.get(1).getTextContent().trim();
|
||||
|
||||
// if number does not match, episode is probably some kind of special (S1, S2, ...)
|
||||
if (number.matches("\\d+")) {
|
||||
// no seasons for anime
|
||||
episodes.add(new Episode(searchResult.getName(), null, number, title));
|
||||
episodes.add(new Episode(animeTitle, null, number, title));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,10 +2,12 @@
|
|||
package net.sourceforge.filebot.web;
|
||||
|
||||
|
||||
import static net.sourceforge.filebot.web.WebRequest.*;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -62,14 +64,25 @@ public class AnidbClientTest {
|
|||
|
||||
|
||||
@Test
|
||||
public void searchResultPageRedirect() throws Exception {
|
||||
public void searchReturnMatchingTitle() throws Exception {
|
||||
// Seikai no Senki (main title), Banner of the Stars (official english title)
|
||||
assertEquals("Banner of the Stars", anidb.search("banner of the stars").get(0).getName());
|
||||
assertEquals("Seikai no Senki", anidb.search("seikai no senki").get(0).getName());
|
||||
|
||||
// no matching title
|
||||
assertEquals("Naruto", anidb.search("naruto").get(0).getName());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void searchPageRedirect() throws Exception {
|
||||
List<SearchResult> results = anidb.search("twelve kingdoms");
|
||||
|
||||
assertEquals(1, results.size());
|
||||
|
||||
HyperLink result = (HyperLink) results.get(0);
|
||||
|
||||
assertEquals("Juuni Kokuki", result.getName());
|
||||
assertEquals("The Twelve Kingdoms", result.getName());
|
||||
assertEquals("http://anidb.net/a26", result.getURL().toString());
|
||||
}
|
||||
|
||||
|
@ -97,13 +110,29 @@ public class AnidbClientTest {
|
|||
|
||||
Episode first = list.get(0);
|
||||
|
||||
assertEquals("Juuni Kokuki", first.getSeriesName());
|
||||
assertEquals("The Twelve Kingdoms", first.getSeriesName());
|
||||
assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle());
|
||||
assertEquals("1", first.getEpisode());
|
||||
assertEquals(null, first.getSeason());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void selectTitle() throws Exception {
|
||||
// use official english title
|
||||
assertEquals("Banner of the Stars", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a4"))));
|
||||
|
||||
// official english title not available -> use main title
|
||||
assertEquals("Turn A Gundam", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a916"))));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void selectJapaneseTitle() throws Exception {
|
||||
assertEquals("十二国記", anidb.selectOfficialTitle(getHtmlDocument(twelvekingdomsSearchResult.getURL()), Locale.JAPANESE));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListLink() throws Exception {
|
||||
assertEquals(monsterSearchResult.getURL().toString(), anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());
|
||||
|
|
Loading…
Reference in New Issue