* more robust parsing of xml data

This commit is contained in:
Reinhard Pointner 2014-12-23 06:38:29 +00:00
parent bfa53d60d7
commit 0c741cc9cd
4 changed files with 47 additions and 66 deletions

View File

@ -3,10 +3,11 @@ package net.filebot.util;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Scanner;
import javax.xml.namespace.QName;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
@ -16,40 +17,24 @@ import org.w3c.dom.NodeList;
public final class XPathUtilities {
public static Node selectNode(String xpath, Object node) {
try {
return (Node) getXPath(xpath).evaluate(node, XPathConstants.NODE);
} catch (Exception e) {
throw new RuntimeException(e);
}
return (Node) evaluateXPath(xpath, node, XPathConstants.NODE);
}
public static List<Node> selectNodes(String xpath, Object node) {
try {
return new NodeListDecorator((NodeList) getXPath(xpath).evaluate(node, XPathConstants.NODESET));
} catch (Exception e) {
throw new RuntimeException(e);
}
return new NodeListDecorator((NodeList) evaluateXPath(xpath, node, XPathConstants.NODESET));
}
public static String selectString(String xpath, Object node) {
try {
return ((String) getXPath(xpath).evaluate(node, XPathConstants.STRING)).trim();
} catch (Exception e) {
throw new RuntimeException(e);
}
return ((String) evaluateXPath(xpath, node, XPathConstants.STRING)).trim();
}
public static List<String> selectStrings(String xpath, Object node) {
List<String> values = new ArrayList<String>();
try {
for (Node it : selectNodes(xpath, node)) {
String textContent = getTextContent(it);
if (textContent.length() > 0) {
values.add(textContent);
}
for (Node it : selectNodes(xpath, node)) {
String textContent = getTextContent(it);
if (textContent.length() > 0) {
values.add(textContent);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return values;
}
@ -90,14 +75,6 @@ public final class XPathUtilities {
return null;
}
public static Integer getIntegerAttribute(String attribute, Node node) {
try {
return new Scanner(getAttribute(attribute, node)).useDelimiter("\\D+").nextInt();
} catch (Exception e) {
return null;
}
}
/**
* Get text content of the first child node matching the given node name. Use this method instead of {@link #selectString(String, Object)} whenever xpath support is not required, because it is much faster, especially for large documents.
*
@ -127,22 +104,6 @@ public final class XPathUtilities {
return sb.toString().trim();
}
public static Integer getIntegerContent(String childName, Node parentNode) {
try {
return new Scanner(getTextContent(childName, parentNode)).useDelimiter("\\D+").nextInt();
} catch (Exception e) {
return null;
}
}
public static Double getDecimalContent(String childName, Node parentNode) {
try {
return new Double(getTextContent(childName, parentNode));
} catch (Exception e) {
return null;
}
}
public static List<String> getListContent(String childName, String delimiter, Node parentNode) {
List<String> list = new ArrayList<String>();
for (Node node : getChildren(childName, parentNode)) {
@ -163,8 +124,28 @@ public final class XPathUtilities {
return list;
}
private static XPathExpression getXPath(String xpath) throws XPathExpressionException {
return XPathFactory.newInstance().newXPath().compile(xpath);
public static Integer getInteger(String textContent) {
try {
return new Scanner(textContent).useDelimiter("\\D+").nextInt();
} catch (NumberFormatException | NoSuchElementException | NullPointerException e) {
return null;
}
}
public static Double getDecimal(String textContent) {
try {
return new Double(textContent);
} catch (NumberFormatException | NullPointerException e) {
return null;
}
}
public static Object evaluateXPath(String xpath, Object item, QName returnType) {
try {
return XPathFactory.newInstance().newXPath().compile(xpath).evaluate(item, returnType);
} catch (XPathExpressionException e) {
throw new IllegalArgumentException(e);
}
}
/**

View File

@ -122,8 +122,8 @@ public class AnidbClient extends AbstractEpisodeListProvider {
}
seriesInfo.setName(selectString("anime/titles/title[@type='main']", dom));
seriesInfo.setRating(new Double(selectString("anime/ratings/permanent", dom)));
seriesInfo.setRatingCount(new Integer(selectString("anime/ratings/permanent/@count", dom)));
seriesInfo.setRating(getDecimal(selectString("anime/ratings/permanent", dom)));
seriesInfo.setRatingCount(getInteger(getTextContent("anime/ratings/permanent/@count", dom)));
seriesInfo.setStartDate(SimpleDate.parse(selectString("anime/startdate", dom), "yyyy-MM-dd"));
// add categories ordered by weight as genres
@ -132,7 +132,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
// * limit to 5 genres
seriesInfo.setGenres(selectNodes("anime/categories/category", dom).stream().map(categoryNode -> {
String name = getTextContent("name", categoryNode);
Integer weight = getIntegerAttribute("weight", categoryNode);
Integer weight = getInteger(getAttribute("weight", categoryNode));
return new SimpleImmutableEntry<String, Integer>(name, weight);
}).filter(nw -> {
return nw.getKey() != null && nw.getValue() != null && nw.getKey().length() > 0 && nw.getValue() >= 400;
@ -174,7 +174,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
// sanity check
if (episodes.isEmpty()) {
// anime page xml doesn't work sometimes
Logger.getLogger(AnidbClient.class.getName()).log(Level.WARNING, String.format("Unable to parse episode data: %s (%d) => %s", anime, anime.getAnimeId(), getXmlString(dom, false)));
Logger.getLogger(AnidbClient.class.getName()).log(Level.WARNING, String.format("Unable to parse episode data: %s (%d): %s", anime, anime.getAnimeId(), getXmlString(dom, false).split("\n", 2)[0].trim()));
}
return new SeriesData(seriesInfo, episodes);

View File

@ -93,7 +93,7 @@ public class TVRageClient extends AbstractEpisodeListProvider {
seriesInfo.setName(getTextContent("name", seriesNode));
seriesInfo.setNetwork(getTextContent("network", seriesNode));
seriesInfo.setRuntime(getIntegerContent("runtime", seriesNode));
seriesInfo.setRuntime(getInteger(getTextContent("runtime", seriesNode)));
seriesInfo.setStatus(getTextContent("status", seriesNode));
seriesInfo.setGenres(getListContent("genre", null, getChild("genres", seriesNode)));
@ -106,7 +106,7 @@ public class TVRageClient extends AbstractEpisodeListProvider {
// episodes and specials
for (Node node : selectNodes("//episode", dom)) {
String title = getTextContent("title", node);
Integer episodeNumber = getIntegerContent("seasonnum", node);
Integer episodeNumber = getInteger(getTextContent("seasonnum", node));
String seasonIdentifier = getAttribute("no", node.getParentNode());
Integer seasonNumber = seasonIdentifier == null ? null : new Integer(seasonIdentifier);
SimpleDate airdate = SimpleDate.parse(getTextContent("airdate", node), "yyyy-MM-dd");
@ -114,13 +114,13 @@ public class TVRageClient extends AbstractEpisodeListProvider {
// check if we have season and episode number, if not it must be a special episode
if (episodeNumber == null || seasonNumber == null) {
// handle as special episode
seasonNumber = getIntegerContent("season", node);
seasonNumber = getInteger(getTextContent("season", node));
int specialNumber = filterBySeason(specials, seasonNumber).size() + 1;
specials.add(new Episode(seriesInfo.getName(), seasonNumber, null, title, null, specialNumber, airdate, new SeriesInfo(seriesInfo)));
} else {
// handle as normal episode
if (sortOrder == SortOrder.Absolute) {
episodeNumber = getIntegerContent("epnum", node);
episodeNumber = getInteger(getTextContent("epnum", node));
seasonNumber = null;
}
episodes.add(new Episode(seriesInfo.getName(), seasonNumber, episodeNumber, title, null, null, airdate, new SeriesInfo(seriesInfo)));

View File

@ -103,7 +103,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
Map<Integer, TheTVDBSearchResult> resultSet = new LinkedHashMap<Integer, TheTVDBSearchResult>();
for (Node node : nodes) {
int sid = getIntegerContent("seriesid", node);
int sid = getInteger(getTextContent("seriesid", node));
String seriesName = getTextContent("SeriesName", node);
List<String> aliasNames = new ArrayList<String>();
@ -142,9 +142,9 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
seriesInfo.setOverview(getTextContent("Overview", seriesNode));
seriesInfo.setStatus(getTextContent("Status", seriesNode));
seriesInfo.setRating(getDecimalContent("Rating", seriesNode));
seriesInfo.setRatingCount(getIntegerContent("RatingCount", seriesNode));
seriesInfo.setRuntime(getIntegerContent("Runtime", seriesNode));
seriesInfo.setRating(getDecimal(getTextContent("Rating", seriesNode)));
seriesInfo.setRatingCount(getInteger(getTextContent("RatingCount", seriesNode)));
seriesInfo.setRuntime(getInteger(getTextContent("Runtime", seriesNode)));
seriesInfo.setActors(getListContent("Actors", "\\|", seriesNode));
seriesInfo.setGenres(getListContent("Genre", "\\|", seriesNode));
seriesInfo.setStartDate(SimpleDate.parse(getTextContent("FirstAired", seriesNode), "yyyy-MM-dd"));
@ -163,16 +163,16 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
String episodeName = getTextContent("EpisodeName", node);
String dvdSeasonNumber = getTextContent("DVD_season", node);
String dvdEpisodeNumber = getTextContent("DVD_episodenumber", node);
Integer absoluteNumber = getIntegerContent("absolute_number", node);
Integer absoluteNumber = getInteger(getTextContent("absolute_number", node));
SimpleDate airdate = SimpleDate.parse(getTextContent("FirstAired", node), "yyyy-MM-dd");
// default numbering
Integer episodeNumber = getIntegerContent("EpisodeNumber", node);
Integer seasonNumber = getIntegerContent("SeasonNumber", node);
Integer episodeNumber = getInteger(getTextContent("EpisodeNumber", node));
Integer seasonNumber = getInteger(getTextContent("SeasonNumber", node));
if (seasonNumber == null || seasonNumber == 0) {
// handle as special episode
Integer airsBefore = getIntegerContent("airsbefore_season", node);
Integer airsBefore = getInteger(getTextContent("airsbefore_season", node));
if (airsBefore != null) {
seasonNumber = airsBefore;
}