mirror of
https://github.com/mitb-archive/filebot
synced 2024-11-04 08:25:03 -05:00
* more robust parsing of xml data
This commit is contained in:
parent
bfa53d60d7
commit
0c741cc9cd
@ -3,10 +3,11 @@ package net.filebot.util;
|
||||
import java.util.AbstractList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Scanner;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpression;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
@ -16,40 +17,24 @@ import org.w3c.dom.NodeList;
|
||||
public final class XPathUtilities {
|
||||
|
||||
public static Node selectNode(String xpath, Object node) {
|
||||
try {
|
||||
return (Node) getXPath(xpath).evaluate(node, XPathConstants.NODE);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return (Node) evaluateXPath(xpath, node, XPathConstants.NODE);
|
||||
}
|
||||
|
||||
public static List<Node> selectNodes(String xpath, Object node) {
|
||||
try {
|
||||
return new NodeListDecorator((NodeList) getXPath(xpath).evaluate(node, XPathConstants.NODESET));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return new NodeListDecorator((NodeList) evaluateXPath(xpath, node, XPathConstants.NODESET));
|
||||
}
|
||||
|
||||
public static String selectString(String xpath, Object node) {
|
||||
try {
|
||||
return ((String) getXPath(xpath).evaluate(node, XPathConstants.STRING)).trim();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return ((String) evaluateXPath(xpath, node, XPathConstants.STRING)).trim();
|
||||
}
|
||||
|
||||
public static List<String> selectStrings(String xpath, Object node) {
|
||||
List<String> values = new ArrayList<String>();
|
||||
try {
|
||||
for (Node it : selectNodes(xpath, node)) {
|
||||
String textContent = getTextContent(it);
|
||||
if (textContent.length() > 0) {
|
||||
values.add(textContent);
|
||||
}
|
||||
for (Node it : selectNodes(xpath, node)) {
|
||||
String textContent = getTextContent(it);
|
||||
if (textContent.length() > 0) {
|
||||
values.add(textContent);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
@ -90,14 +75,6 @@ public final class XPathUtilities {
|
||||
return null;
|
||||
}
|
||||
|
||||
public static Integer getIntegerAttribute(String attribute, Node node) {
|
||||
try {
|
||||
return new Scanner(getAttribute(attribute, node)).useDelimiter("\\D+").nextInt();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text content of the first child node matching the given node name. Use this method instead of {@link #selectString(String, Object)} whenever xpath support is not required, because it is much faster, especially for large documents.
|
||||
*
|
||||
@ -127,22 +104,6 @@ public final class XPathUtilities {
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
public static Integer getIntegerContent(String childName, Node parentNode) {
|
||||
try {
|
||||
return new Scanner(getTextContent(childName, parentNode)).useDelimiter("\\D+").nextInt();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static Double getDecimalContent(String childName, Node parentNode) {
|
||||
try {
|
||||
return new Double(getTextContent(childName, parentNode));
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> getListContent(String childName, String delimiter, Node parentNode) {
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (Node node : getChildren(childName, parentNode)) {
|
||||
@ -163,8 +124,28 @@ public final class XPathUtilities {
|
||||
return list;
|
||||
}
|
||||
|
||||
private static XPathExpression getXPath(String xpath) throws XPathExpressionException {
|
||||
return XPathFactory.newInstance().newXPath().compile(xpath);
|
||||
public static Integer getInteger(String textContent) {
|
||||
try {
|
||||
return new Scanner(textContent).useDelimiter("\\D+").nextInt();
|
||||
} catch (NumberFormatException | NoSuchElementException | NullPointerException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static Double getDecimal(String textContent) {
|
||||
try {
|
||||
return new Double(textContent);
|
||||
} catch (NumberFormatException | NullPointerException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static Object evaluateXPath(String xpath, Object item, QName returnType) {
|
||||
try {
|
||||
return XPathFactory.newInstance().newXPath().compile(xpath).evaluate(item, returnType);
|
||||
} catch (XPathExpressionException e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -122,8 +122,8 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
}
|
||||
|
||||
seriesInfo.setName(selectString("anime/titles/title[@type='main']", dom));
|
||||
seriesInfo.setRating(new Double(selectString("anime/ratings/permanent", dom)));
|
||||
seriesInfo.setRatingCount(new Integer(selectString("anime/ratings/permanent/@count", dom)));
|
||||
seriesInfo.setRating(getDecimal(selectString("anime/ratings/permanent", dom)));
|
||||
seriesInfo.setRatingCount(getInteger(getTextContent("anime/ratings/permanent/@count", dom)));
|
||||
seriesInfo.setStartDate(SimpleDate.parse(selectString("anime/startdate", dom), "yyyy-MM-dd"));
|
||||
|
||||
// add categories ordered by weight as genres
|
||||
@ -132,7 +132,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
// * limit to 5 genres
|
||||
seriesInfo.setGenres(selectNodes("anime/categories/category", dom).stream().map(categoryNode -> {
|
||||
String name = getTextContent("name", categoryNode);
|
||||
Integer weight = getIntegerAttribute("weight", categoryNode);
|
||||
Integer weight = getInteger(getAttribute("weight", categoryNode));
|
||||
return new SimpleImmutableEntry<String, Integer>(name, weight);
|
||||
}).filter(nw -> {
|
||||
return nw.getKey() != null && nw.getValue() != null && nw.getKey().length() > 0 && nw.getValue() >= 400;
|
||||
@ -174,7 +174,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
// sanity check
|
||||
if (episodes.isEmpty()) {
|
||||
// anime page xml doesn't work sometimes
|
||||
Logger.getLogger(AnidbClient.class.getName()).log(Level.WARNING, String.format("Unable to parse episode data: %s (%d) => %s", anime, anime.getAnimeId(), getXmlString(dom, false)));
|
||||
Logger.getLogger(AnidbClient.class.getName()).log(Level.WARNING, String.format("Unable to parse episode data: %s (%d): %s", anime, anime.getAnimeId(), getXmlString(dom, false).split("\n", 2)[0].trim()));
|
||||
}
|
||||
|
||||
return new SeriesData(seriesInfo, episodes);
|
||||
|
@ -93,7 +93,7 @@ public class TVRageClient extends AbstractEpisodeListProvider {
|
||||
|
||||
seriesInfo.setName(getTextContent("name", seriesNode));
|
||||
seriesInfo.setNetwork(getTextContent("network", seriesNode));
|
||||
seriesInfo.setRuntime(getIntegerContent("runtime", seriesNode));
|
||||
seriesInfo.setRuntime(getInteger(getTextContent("runtime", seriesNode)));
|
||||
seriesInfo.setStatus(getTextContent("status", seriesNode));
|
||||
|
||||
seriesInfo.setGenres(getListContent("genre", null, getChild("genres", seriesNode)));
|
||||
@ -106,7 +106,7 @@ public class TVRageClient extends AbstractEpisodeListProvider {
|
||||
// episodes and specials
|
||||
for (Node node : selectNodes("//episode", dom)) {
|
||||
String title = getTextContent("title", node);
|
||||
Integer episodeNumber = getIntegerContent("seasonnum", node);
|
||||
Integer episodeNumber = getInteger(getTextContent("seasonnum", node));
|
||||
String seasonIdentifier = getAttribute("no", node.getParentNode());
|
||||
Integer seasonNumber = seasonIdentifier == null ? null : new Integer(seasonIdentifier);
|
||||
SimpleDate airdate = SimpleDate.parse(getTextContent("airdate", node), "yyyy-MM-dd");
|
||||
@ -114,13 +114,13 @@ public class TVRageClient extends AbstractEpisodeListProvider {
|
||||
// check if we have season and episode number, if not it must be a special episode
|
||||
if (episodeNumber == null || seasonNumber == null) {
|
||||
// handle as special episode
|
||||
seasonNumber = getIntegerContent("season", node);
|
||||
seasonNumber = getInteger(getTextContent("season", node));
|
||||
int specialNumber = filterBySeason(specials, seasonNumber).size() + 1;
|
||||
specials.add(new Episode(seriesInfo.getName(), seasonNumber, null, title, null, specialNumber, airdate, new SeriesInfo(seriesInfo)));
|
||||
} else {
|
||||
// handle as normal episode
|
||||
if (sortOrder == SortOrder.Absolute) {
|
||||
episodeNumber = getIntegerContent("epnum", node);
|
||||
episodeNumber = getInteger(getTextContent("epnum", node));
|
||||
seasonNumber = null;
|
||||
}
|
||||
episodes.add(new Episode(seriesInfo.getName(), seasonNumber, episodeNumber, title, null, null, airdate, new SeriesInfo(seriesInfo)));
|
||||
|
@ -103,7 +103,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
|
||||
Map<Integer, TheTVDBSearchResult> resultSet = new LinkedHashMap<Integer, TheTVDBSearchResult>();
|
||||
|
||||
for (Node node : nodes) {
|
||||
int sid = getIntegerContent("seriesid", node);
|
||||
int sid = getInteger(getTextContent("seriesid", node));
|
||||
String seriesName = getTextContent("SeriesName", node);
|
||||
|
||||
List<String> aliasNames = new ArrayList<String>();
|
||||
@ -142,9 +142,9 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
|
||||
seriesInfo.setOverview(getTextContent("Overview", seriesNode));
|
||||
seriesInfo.setStatus(getTextContent("Status", seriesNode));
|
||||
|
||||
seriesInfo.setRating(getDecimalContent("Rating", seriesNode));
|
||||
seriesInfo.setRatingCount(getIntegerContent("RatingCount", seriesNode));
|
||||
seriesInfo.setRuntime(getIntegerContent("Runtime", seriesNode));
|
||||
seriesInfo.setRating(getDecimal(getTextContent("Rating", seriesNode)));
|
||||
seriesInfo.setRatingCount(getInteger(getTextContent("RatingCount", seriesNode)));
|
||||
seriesInfo.setRuntime(getInteger(getTextContent("Runtime", seriesNode)));
|
||||
seriesInfo.setActors(getListContent("Actors", "\\|", seriesNode));
|
||||
seriesInfo.setGenres(getListContent("Genre", "\\|", seriesNode));
|
||||
seriesInfo.setStartDate(SimpleDate.parse(getTextContent("FirstAired", seriesNode), "yyyy-MM-dd"));
|
||||
@ -163,16 +163,16 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
|
||||
String episodeName = getTextContent("EpisodeName", node);
|
||||
String dvdSeasonNumber = getTextContent("DVD_season", node);
|
||||
String dvdEpisodeNumber = getTextContent("DVD_episodenumber", node);
|
||||
Integer absoluteNumber = getIntegerContent("absolute_number", node);
|
||||
Integer absoluteNumber = getInteger(getTextContent("absolute_number", node));
|
||||
SimpleDate airdate = SimpleDate.parse(getTextContent("FirstAired", node), "yyyy-MM-dd");
|
||||
|
||||
// default numbering
|
||||
Integer episodeNumber = getIntegerContent("EpisodeNumber", node);
|
||||
Integer seasonNumber = getIntegerContent("SeasonNumber", node);
|
||||
Integer episodeNumber = getInteger(getTextContent("EpisodeNumber", node));
|
||||
Integer seasonNumber = getInteger(getTextContent("SeasonNumber", node));
|
||||
|
||||
if (seasonNumber == null || seasonNumber == 0) {
|
||||
// handle as special episode
|
||||
Integer airsBefore = getIntegerContent("airsbefore_season", node);
|
||||
Integer airsBefore = getInteger(getTextContent("airsbefore_season", node));
|
||||
if (airsBefore != null) {
|
||||
seasonNumber = airsBefore;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user