disabled namespaces in nekohtml parser
This commit is contained in:
parent
319a528542
commit
3c0296d11e
@ -28,11 +28,11 @@ public class Torrent {
|
||||
|
||||
|
||||
public Torrent(File torrent) throws IOException {
|
||||
FileInputStream in = new FileInputStream(torrent);
|
||||
BufferedInputStream in = new BufferedInputStream(new FileInputStream(torrent));
|
||||
Map<?, ?> torrentMap = null;
|
||||
|
||||
try {
|
||||
torrentMap = BDecoder.decode(new BufferedInputStream(in));
|
||||
torrentMap = BDecoder.decode(in);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import net.sourceforge.filebot.resources.ResourceManager;
|
||||
import net.sourceforge.tuned.XPathUtil;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
@ -42,19 +43,19 @@ public class AnidbSearchEngine extends SearchEngine {
|
||||
|
||||
Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
|
||||
|
||||
List<Node> nodes = HtmlUtil.selectNodes("//TABLE[@class='anime_list']//TR//TD//ancestor::TR", dom);
|
||||
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='anime_list']//TR//TD//ancestor::TR", dom);
|
||||
ArrayList<String> shows = new ArrayList<String>(nodes.size());
|
||||
|
||||
if (!nodes.isEmpty())
|
||||
for (Node node : nodes) {
|
||||
String type = HtmlUtil.selectString("./TD[2]/text()", node);
|
||||
String type = XPathUtil.selectString("./TD[2]/text()", node);
|
||||
|
||||
// we only want shows
|
||||
if (type.equalsIgnoreCase("tv series")) {
|
||||
Node titleNode = HtmlUtil.selectNode("./TD[1]/A", node);
|
||||
Node titleNode = XPathUtil.selectNode("./TD[1]/A", node);
|
||||
|
||||
String title = HtmlUtil.selectString("text()", titleNode);
|
||||
String href = HtmlUtil.selectString("@href", titleNode);
|
||||
String title = XPathUtil.selectString("text()", titleNode);
|
||||
String href = XPathUtil.selectString("@href", titleNode);
|
||||
|
||||
String file = "/perl-bin/" + href;
|
||||
|
||||
@ -70,11 +71,11 @@ public class AnidbSearchEngine extends SearchEngine {
|
||||
}
|
||||
else {
|
||||
// we might have been redirected to the episode list page directly
|
||||
List<Node> results = HtmlUtil.selectNodes("//TABLE[@class='eplist']", dom);
|
||||
List<Node> results = XPathUtil.selectNodes("//TABLE[@class='eplist']", dom);
|
||||
|
||||
if (!results.isEmpty()) {
|
||||
// get show's name from the document
|
||||
String header = HtmlUtil.selectString("//DIV[@id='layout-content']//H1[1]/text()", dom);
|
||||
String header = XPathUtil.selectString("//DIV[@id='layout-content']//H1[1]/text()", dom);
|
||||
String title = header.replaceFirst("Anime:\\s*", "");
|
||||
|
||||
cache.put(title, getSearchUrl(searchterm));
|
||||
@ -92,7 +93,7 @@ public class AnidbSearchEngine extends SearchEngine {
|
||||
|
||||
Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season));
|
||||
|
||||
List<Node> nodes = HtmlUtil.selectNodes("//TABLE[@id='eplist']//TR/TD/SPAN/ancestor::TR", dom);
|
||||
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@id='eplist']//TR/TD/SPAN/ancestor::TR", dom);
|
||||
|
||||
LinkedList<Episode> list = new LinkedList<Episode>();
|
||||
|
||||
@ -101,8 +102,8 @@ public class AnidbSearchEngine extends SearchEngine {
|
||||
f.setGroupingUsed(false);
|
||||
|
||||
for (Node node : nodes) {
|
||||
String number = HtmlUtil.selectString("./TD[1]/A/text()", node);
|
||||
String title = HtmlUtil.selectString("./TD[2]/SPAN/text()", node);
|
||||
String number = XPathUtil.selectString("./TD[1]/A/text()", node);
|
||||
String title = XPathUtil.selectString("./TD[2]/SPAN/text()", node);
|
||||
|
||||
if (title.startsWith("recap"))
|
||||
title = title.replaceFirst("recap", "");
|
||||
|
@ -9,21 +9,17 @@ import java.io.Reader;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import net.sourceforge.tuned.XPathUtil;
|
||||
|
||||
import org.cyberneko.html.parsers.DOMParser;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
|
||||
class HtmlUtil {
|
||||
public class HtmlUtil {
|
||||
|
||||
private static Charset getCharset(String contentType) {
|
||||
if (contentType != null) {
|
||||
@ -58,34 +54,10 @@ class HtmlUtil {
|
||||
|
||||
public static Document getHtmlDocument(Reader reader) throws SAXException, IOException {
|
||||
DOMParser parser = new DOMParser();
|
||||
parser.setFeature("http://xml.org/sax/features/namespaces", false);
|
||||
parser.parse(new InputSource(reader));
|
||||
|
||||
return parser.getDocument();
|
||||
}
|
||||
|
||||
|
||||
public static String selectString(String xpath, Node node) {
|
||||
return XPathUtil.selectString(xpath, node, "html", getNameSpace(node)).trim();
|
||||
}
|
||||
|
||||
|
||||
public static List<Node> selectNodes(String xpath, Node node) {
|
||||
return XPathUtil.selectNodes(xpath, node, "html", getNameSpace(node));
|
||||
}
|
||||
|
||||
|
||||
public static Node selectNode(String xpath, Node node) {
|
||||
return XPathUtil.selectNode(xpath, node, "html", getNameSpace(node));
|
||||
}
|
||||
|
||||
|
||||
private static String getNameSpace(Node node) {
|
||||
if (node instanceof Document) {
|
||||
// select root element
|
||||
return XPathUtil.selectNode("/*", node, null, null).getNamespaceURI();
|
||||
}
|
||||
|
||||
return node.getNamespaceURI();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sourceforge.filebot.resources.ResourceManager;
|
||||
import net.sourceforge.tuned.XPathUtil;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
@ -43,13 +44,13 @@ public class TVRageSearchEngine extends SearchEngine {
|
||||
|
||||
Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
|
||||
|
||||
List<Node> nodes = HtmlUtil.selectNodes("//DIV[@id='search_begin']//TABLE[1]//TR/TD/A[1]", dom);
|
||||
List<Node> nodes = XPathUtil.selectNodes("//DIV[@id='search_begin']//TABLE[1]/*/TR/TD/A[1]", dom);
|
||||
|
||||
ArrayList<String> shows = new ArrayList<String>(nodes.size());
|
||||
|
||||
for (Node node : nodes) {
|
||||
String href = HtmlUtil.selectString("@href", node);
|
||||
String title = HtmlUtil.selectString("text()", node);
|
||||
String href = XPathUtil.selectString("@href", node);
|
||||
String title = XPathUtil.selectString("text()", node);
|
||||
|
||||
try {
|
||||
URL url = new URL(href);
|
||||
@ -69,18 +70,18 @@ public class TVRageSearchEngine extends SearchEngine {
|
||||
|
||||
Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season));
|
||||
|
||||
List<Node> nodes = HtmlUtil.selectNodes("//TABLE[@class='b']//TR[@id='brow']", dom);
|
||||
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='b']//TR[@id='brow']", dom);
|
||||
|
||||
ArrayList<Episode> episodes = new ArrayList<Episode>();
|
||||
|
||||
for (Node node : nodes) {
|
||||
String seasonAndEpisodeNumber = HtmlUtil.selectString("./TD[2]/A/text()", node);
|
||||
String title = HtmlUtil.selectString("./TD[4]/A/text()", node);
|
||||
String seasonAndEpisodeNumber = XPathUtil.selectString("./TD[2]/A/text()", node);
|
||||
String title = XPathUtil.selectString("./TD[4]/A/text()", node);
|
||||
|
||||
List<Node> precedings = HtmlUtil.selectNodes("../preceding-sibling::TABLE", node);
|
||||
List<Node> precedings = XPathUtil.selectNodes("../preceding-sibling::TABLE", node);
|
||||
Node previousTable = precedings.get(precedings.size() - 1);
|
||||
|
||||
String seasonHeader = HtmlUtil.selectString("./TR/TD/FONT/text()", previousTable);
|
||||
String seasonHeader = XPathUtil.selectString("./TR/TD/FONT/text()", previousTable);
|
||||
|
||||
Matcher seasonMatcher = Pattern.compile("Season (\\d+)").matcher(seasonHeader);
|
||||
|
||||
|
@ -16,6 +16,7 @@ import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import net.sourceforge.filebot.resources.ResourceManager;
|
||||
import net.sourceforge.tuned.XPathUtil;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
@ -42,7 +43,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
|
||||
|
||||
Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
|
||||
|
||||
List<Node> nodes = HtmlUtil.selectNodes("//html:TABLE[@id='search-results']//html:SPAN/html:A", dom);
|
||||
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@id='search-results']//SPAN/A", dom);
|
||||
|
||||
ArrayList<String> shows = new ArrayList<String>(nodes.size());
|
||||
|
||||
@ -52,7 +53,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
|
||||
// we only want search results that are shows
|
||||
if (category.toLowerCase().startsWith("show")) {
|
||||
String title = node.getTextContent();
|
||||
String href = HtmlUtil.selectString("@href", node);
|
||||
String href = XPathUtil.selectString("@href", node);
|
||||
|
||||
try {
|
||||
URL url = new URL(href);
|
||||
@ -74,7 +75,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
|
||||
|
||||
Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season));
|
||||
|
||||
List<Node> nodes = HtmlUtil.selectNodes("//html:DIV[@id='episode-listing']/html:DIV/html:TABLE/html:TR/html:TD/ancestor::html:TR", dom);
|
||||
List<Node> nodes = XPathUtil.selectNodes("//DIV[@id='episode-listing']/DIV/TABLE/TR/TD/ancestor::TR", dom);
|
||||
|
||||
String seasonString = null;
|
||||
|
||||
@ -93,8 +94,8 @@ public class TvdotcomSearchEngine extends SearchEngine {
|
||||
episodeOffset = 0;
|
||||
|
||||
for (Node node : nodes) {
|
||||
String episodeNumber = HtmlUtil.selectString("./html:TD[1]/text()", node);
|
||||
String title = HtmlUtil.selectString("./html:TD[2]/html:A/text()", node);
|
||||
String episodeNumber = XPathUtil.selectString("./TD[1]/text()", node);
|
||||
String title = XPathUtil.selectString("./TD[2]/A/text()", node);
|
||||
|
||||
try {
|
||||
// format number of episode
|
||||
@ -105,7 +106,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
|
||||
|
||||
episodeNumber = numberFormat.format(n - episodeOffset);
|
||||
} catch (NumberFormatException e) {
|
||||
// episode number can be "Pilot" or "Special"
|
||||
// episode number may be "Pilot", "Special", etc.
|
||||
}
|
||||
|
||||
episodes.add(new Episode(showname, seasonString, episodeNumber, title));
|
||||
|
@ -3,11 +3,8 @@ package net.sourceforge.tuned;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.XMLConstants;
|
||||
import javax.xml.namespace.NamespaceContext;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
@ -18,9 +15,9 @@ import org.w3c.dom.NodeList;
|
||||
|
||||
public class XPathUtil {
|
||||
|
||||
public static Node selectNode(String xpath, Object node, String namespacePrefix, String namespace) {
|
||||
public static Node selectNode(String xpath, Object node) {
|
||||
try {
|
||||
XPath xp = createXPath(namespacePrefix, namespace);
|
||||
XPath xp = XPathFactory.newInstance().newXPath();
|
||||
|
||||
return (Node) xp.evaluate(xpath, node, XPathConstants.NODE);
|
||||
} catch (Exception e) {
|
||||
@ -29,9 +26,9 @@ public class XPathUtil {
|
||||
}
|
||||
|
||||
|
||||
public static List<Node> selectNodes(String xpath, Object node, String namespacePrefix, String namespace) {
|
||||
public static List<Node> selectNodes(String xpath, Object node) {
|
||||
try {
|
||||
XPath xp = createXPath(namespacePrefix, namespace);
|
||||
XPath xp = XPathFactory.newInstance().newXPath();
|
||||
|
||||
NodeList nodeList = (NodeList) xp.evaluate(xpath, node, XPathConstants.NODESET);
|
||||
|
||||
@ -48,69 +45,13 @@ public class XPathUtil {
|
||||
}
|
||||
|
||||
|
||||
public static String selectString(String xpath, Object node, String namespacePrefix, String namespace) {
|
||||
public static String selectString(String xpath, Object node) {
|
||||
try {
|
||||
XPath xp = createXPath(namespacePrefix, namespace);
|
||||
return (String) xp.evaluate(xpath, node, XPathConstants.STRING);
|
||||
XPath xp = XPathFactory.newInstance().newXPath();
|
||||
return ((String) xp.evaluate(xpath, node, XPathConstants.STRING)).trim();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static XPath createXPath(String namespacePrefix, String namespace) {
|
||||
XPath xp = XPathFactory.newInstance().newXPath();
|
||||
|
||||
if (namespacePrefix != null && namespace != null) {
|
||||
xp.setNamespaceContext(new NamespaceContextProvider(namespacePrefix, namespace));
|
||||
}
|
||||
|
||||
return xp;
|
||||
}
|
||||
|
||||
|
||||
private static class NamespaceContextProvider implements NamespaceContext {
|
||||
|
||||
String boundPrefix;
|
||||
String boundURI;
|
||||
|
||||
|
||||
NamespaceContextProvider(String prefix, String URI) {
|
||||
boundPrefix = prefix;
|
||||
boundURI = URI;
|
||||
}
|
||||
|
||||
|
||||
public String getNamespaceURI(String prefix) {
|
||||
if (prefix.equals(boundPrefix)) {
|
||||
return boundURI;
|
||||
} else if (prefix.equals(XMLConstants.XML_NS_PREFIX)) {
|
||||
return XMLConstants.XML_NS_URI;
|
||||
} else if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) {
|
||||
return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
|
||||
} else {
|
||||
return XMLConstants.NULL_NS_URI;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String getPrefix(String namespaceURI) {
|
||||
if (namespaceURI.equals(boundURI)) {
|
||||
return boundPrefix;
|
||||
} else if (namespaceURI.equals(XMLConstants.XML_NS_URI)) {
|
||||
return XMLConstants.XML_NS_PREFIX;
|
||||
} else if (namespaceURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
|
||||
return XMLConstants.XMLNS_ATTRIBUTE;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Iterator getPrefixes(String namespaceURI) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user