From ec4254e6874fec7ff06de888666633593078c242 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Mon, 25 May 2009 20:13:30 +0000 Subject: [PATCH] * use "matching title" in anidb search results * use official english anime title * much faster episode information extraction (less xpath) --- .../filebot/similarity/SeriesNameMatcher.java | 9 +- .../sourceforge/filebot/web/AnidbClient.java | 89 +++++++++++++------ .../filebot/web/AnidbClientTest.java | 35 +++++++- 3 files changed, 100 insertions(+), 33 deletions(-) diff --git a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java index 5d1d8342..c1c4598f 100644 --- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java +++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java @@ -281,11 +281,14 @@ public class SeriesNameMatcher { } - protected String[] names(List files) { + protected String[] names(Collection files) { String[] names = new String[files.size()]; - for (int i = 0; i < names.length; i++) { - names[i] = FileUtilities.getName(files.get(i)); + int i = 0; + + // fill array + for (File file : files) { + names[i++] = FileUtilities.getName(file); } return names; diff --git a/source/net/sourceforge/filebot/web/AnidbClient.java b/source/net/sourceforge/filebot/web/AnidbClient.java index af719524..e0623e52 100644 --- a/source/net/sourceforge/filebot/web/AnidbClient.java +++ b/source/net/sourceforge/filebot/web/AnidbClient.java @@ -12,6 +12,7 @@ import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -43,47 +44,80 @@ public class AnidbClient implements EpisodeListProvider { @Override public List search(String query) throws IOException, SAXException { - + // Air Status: ignore + // Anime Type: TV Series, TV Special, OVA + // Hide Synonyms: true URL searchUrl = new URL("http", host, "/perl-bin/animedb.pl?type.tvspecial=1&type.tvseries=1&type.ova=1&show=animelist&orderby.name=0.1&noalias=1&do.update=update&adb.search=" + URLEncoder.encode(query, "UTF-8")); Document dom = getHtmlDocument(searchUrl); List nodes = selectNodes("//TABLE[@class='animelist']//TR/TD/ancestor::TR", dom); - List searchResults = new ArrayList(nodes.size()); + List results = new ArrayList(nodes.size()); for (Node node : nodes) { - Node titleNode = selectNode("./TD[@class='name']/A", node); + Node link = selectNode("./TD[@class='name']/A", node); - String title = getTextContent(titleNode); - String href = getAttribute("href", titleNode); + // prefer title that is similar to the search query + String title = selectString("./following-sibling::*[@class='match']", link); + + // remove leading and trailing parenthesis + title = title.replaceAll("(^\\()|(\\)$)", ""); + + if (title.isEmpty()) { + // fallback: use main title + title = getTextContent(link); + } + + // anime page + String href = getAttribute("href", link); try { - searchResults.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href))); + results.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href))); } catch (MalformedURLException e) { Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href); } } // we might have been redirected to the episode list page - if (searchResults.isEmpty()) { - // check if current page contains an episode list - if (exists("//TABLE[@class='eplist']", dom)) { - // get show's name from the document - String header = selectString("id('layout-content')//H1[1]", dom); - String name = header.replaceFirst("Anime:\\s*", ""); - - String episodeListUrl = selectString("id('layout-main')//DIV[@class='data']//A[@class='short_link']/@href", dom); - - try { - searchResults.add(new HyperLink(name, new URL(episodeListUrl))); - } catch (MalformedURLException e) { - Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + episodeListUrl); - } + if (results.isEmpty()) { + // get anime information from document + String title = selectTitle(dom); + String link = selectString("//*[@class='data']//A[@class='short_link']/@href", dom); + + try { + // insert single entry + results.add(new HyperLink(title, new URL(link))); + } catch (MalformedURLException e) { + Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + link); } } - return searchResults; + return results; + } + + + protected String selectTitle(Document animePage) { + // prefer official english title + String title = selectOfficialTitle(animePage, Locale.ENGLISH); + + if (title.isEmpty()) { + // fallback: extract name from header (e.g. "Anime: Naruto") + title = selectString("//H1", animePage).replaceFirst("Anime:\\s*", "");; + } + + return title; + } + + + protected String selectOfficialTitle(Document animePage, Locale language) { + // create xpath query for official title of the given language + // e.g. //*[@class='data']//*[contains(@class, 'official') and .//*[contains(@title, 'english')]]//LABEL + + String condition = String.format(".//*[contains(@title, '%s')]", language.getDisplayLanguage(Locale.ENGLISH).toLowerCase()); + String xpath = String.format("//*[@class='data']//*[contains(@class, 'official') and %s]//LABEL", condition); + + return selectString(xpath, animePage); } @@ -92,22 +126,23 @@ public class AnidbClient implements EpisodeListProvider { Document dom = getHtmlDocument(getEpisodeListLink(searchResult).toURL()); + // use title from anime page + String animeTitle = selectTitle(dom); + List nodes = selectNodes("id('eplist')//TR/TD/SPAN/ancestor::TR", dom); ArrayList episodes = new ArrayList(nodes.size()); for (Node node : nodes) { - String number = selectString("./TD[contains(@class,'id')]/A", node); - String title = selectString("./TD[@class='title']/LABEL/text()", node); + List columns = getChildren("TD", node); - if (title.startsWith("recap")) { - title = title.replaceFirst("recap", ""); - } + String number = columns.get(0).getTextContent().trim(); + String title = columns.get(1).getTextContent().trim(); // if number does not match, episode is probably some kind of special (S1, S2, ...) if (number.matches("\\d+")) { // no seasons for anime - episodes.add(new Episode(searchResult.getName(), null, number, title)); + episodes.add(new Episode(animeTitle, null, number, title)); } } diff --git a/test/net/sourceforge/filebot/web/AnidbClientTest.java b/test/net/sourceforge/filebot/web/AnidbClientTest.java index f6853c5f..ef08d6c3 100644 --- a/test/net/sourceforge/filebot/web/AnidbClientTest.java +++ b/test/net/sourceforge/filebot/web/AnidbClientTest.java @@ -2,10 +2,12 @@ package net.sourceforge.filebot.web; +import static net.sourceforge.filebot.web.WebRequest.*; import static org.junit.Assert.*; import java.net.URL; import java.util.List; +import java.util.Locale; import org.junit.BeforeClass; import org.junit.Test; @@ -62,14 +64,25 @@ public class AnidbClientTest { @Test - public void searchResultPageRedirect() throws Exception { + public void searchReturnMatchingTitle() throws Exception { + // Seikai no Senki (main title), Banner of the Stars (official english title) + assertEquals("Banner of the Stars", anidb.search("banner of the stars").get(0).getName()); + assertEquals("Seikai no Senki", anidb.search("seikai no senki").get(0).getName()); + + // no matching title + assertEquals("Naruto", anidb.search("naruto").get(0).getName()); + } + + + @Test + public void searchPageRedirect() throws Exception { List results = anidb.search("twelve kingdoms"); assertEquals(1, results.size()); HyperLink result = (HyperLink) results.get(0); - assertEquals("Juuni Kokuki", result.getName()); + assertEquals("The Twelve Kingdoms", result.getName()); assertEquals("http://anidb.net/a26", result.getURL().toString()); } @@ -97,13 +110,29 @@ public class AnidbClientTest { Episode first = list.get(0); - assertEquals("Juuni Kokuki", first.getSeriesName()); + assertEquals("The Twelve Kingdoms", first.getSeriesName()); assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle()); assertEquals("1", first.getEpisode()); assertEquals(null, first.getSeason()); } + @Test + public void selectTitle() throws Exception { + // use official english title + assertEquals("Banner of the Stars", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a4")))); + + // official english title not available -> use main title + assertEquals("Turn A Gundam", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a916")))); + } + + + @Test + public void selectJapaneseTitle() throws Exception { + assertEquals("十二国記", anidb.selectOfficialTitle(getHtmlDocument(twelvekingdomsSearchResult.getURL()), Locale.JAPANESE)); + } + + @Test public void getEpisodeListLink() throws Exception { assertEquals(monsterSearchResult.getURL().toString(), anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());