diff --git a/source/net/sourceforge/filebot/torrent/Torrent.java b/source/net/sourceforge/filebot/torrent/Torrent.java index 3425ea64..7e35d51c 100644 --- a/source/net/sourceforge/filebot/torrent/Torrent.java +++ b/source/net/sourceforge/filebot/torrent/Torrent.java @@ -28,11 +28,11 @@ public class Torrent { public Torrent(File torrent) throws IOException { - FileInputStream in = new FileInputStream(torrent); + BufferedInputStream in = new BufferedInputStream(new FileInputStream(torrent)); Map torrentMap = null; try { - torrentMap = BDecoder.decode(new BufferedInputStream(in)); + torrentMap = BDecoder.decode(in); } finally { in.close(); } diff --git a/source/net/sourceforge/filebot/web/AnidbSearchEngine.java b/source/net/sourceforge/filebot/web/AnidbSearchEngine.java index c049df17..e341b53d 100644 --- a/source/net/sourceforge/filebot/web/AnidbSearchEngine.java +++ b/source/net/sourceforge/filebot/web/AnidbSearchEngine.java @@ -16,6 +16,7 @@ import java.util.Map; import java.util.TreeMap; import net.sourceforge.filebot.resources.ResourceManager; +import net.sourceforge.tuned.XPathUtil; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -42,19 +43,19 @@ public class AnidbSearchEngine extends SearchEngine { Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm)); - List nodes = HtmlUtil.selectNodes("//TABLE[@class='anime_list']//TR//TD//ancestor::TR", dom); + List nodes = XPathUtil.selectNodes("//TABLE[@class='anime_list']//TR//TD//ancestor::TR", dom); ArrayList shows = new ArrayList(nodes.size()); if (!nodes.isEmpty()) for (Node node : nodes) { - String type = HtmlUtil.selectString("./TD[2]/text()", node); + String type = XPathUtil.selectString("./TD[2]/text()", node); // we only want shows if (type.equalsIgnoreCase("tv series")) { - Node titleNode = HtmlUtil.selectNode("./TD[1]/A", node); + Node titleNode = XPathUtil.selectNode("./TD[1]/A", node); - String title = HtmlUtil.selectString("text()", titleNode); - String href = HtmlUtil.selectString("@href", titleNode); + String title = XPathUtil.selectString("text()", titleNode); + String href = XPathUtil.selectString("@href", titleNode); String file = "/perl-bin/" + href; @@ -70,11 +71,11 @@ public class AnidbSearchEngine extends SearchEngine { } else { // we might have been redirected to the episode list page directly - List results = HtmlUtil.selectNodes("//TABLE[@class='eplist']", dom); + List results = XPathUtil.selectNodes("//TABLE[@class='eplist']", dom); if (!results.isEmpty()) { // get show's name from the document - String header = HtmlUtil.selectString("//DIV[@id='layout-content']//H1[1]/text()", dom); + String header = XPathUtil.selectString("//DIV[@id='layout-content']//H1[1]/text()", dom); String title = header.replaceFirst("Anime:\\s*", ""); cache.put(title, getSearchUrl(searchterm)); @@ -92,7 +93,7 @@ public class AnidbSearchEngine extends SearchEngine { Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season)); - List nodes = HtmlUtil.selectNodes("//TABLE[@id='eplist']//TR/TD/SPAN/ancestor::TR", dom); + List nodes = XPathUtil.selectNodes("//TABLE[@id='eplist']//TR/TD/SPAN/ancestor::TR", dom); LinkedList list = new LinkedList(); @@ -101,8 +102,8 @@ public class AnidbSearchEngine extends SearchEngine { f.setGroupingUsed(false); for (Node node : nodes) { - String number = HtmlUtil.selectString("./TD[1]/A/text()", node); - String title = HtmlUtil.selectString("./TD[2]/SPAN/text()", node); + String number = XPathUtil.selectString("./TD[1]/A/text()", node); + String title = XPathUtil.selectString("./TD[2]/SPAN/text()", node); if (title.startsWith("recap")) title = title.replaceFirst("recap", ""); diff --git a/source/net/sourceforge/filebot/web/HtmlUtil.java b/source/net/sourceforge/filebot/web/HtmlUtil.java index ac42f802..afebdd0a 100644 --- a/source/net/sourceforge/filebot/web/HtmlUtil.java +++ b/source/net/sourceforge/filebot/web/HtmlUtil.java @@ -9,21 +9,17 @@ import java.io.Reader; import java.net.URL; import java.net.URLConnection; import java.nio.charset.Charset; -import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; -import net.sourceforge.tuned.XPathUtil; - import org.cyberneko.html.parsers.DOMParser; import org.w3c.dom.Document; -import org.w3c.dom.Node; import org.xml.sax.InputSource; import org.xml.sax.SAXException; -class HtmlUtil { +public class HtmlUtil { private static Charset getCharset(String contentType) { if (contentType != null) { @@ -58,34 +54,10 @@ class HtmlUtil { public static Document getHtmlDocument(Reader reader) throws SAXException, IOException { DOMParser parser = new DOMParser(); + parser.setFeature("http://xml.org/sax/features/namespaces", false); parser.parse(new InputSource(reader)); return parser.getDocument(); } - - public static String selectString(String xpath, Node node) { - return XPathUtil.selectString(xpath, node, "html", getNameSpace(node)).trim(); - } - - - public static List selectNodes(String xpath, Node node) { - return XPathUtil.selectNodes(xpath, node, "html", getNameSpace(node)); - } - - - public static Node selectNode(String xpath, Node node) { - return XPathUtil.selectNode(xpath, node, "html", getNameSpace(node)); - } - - - private static String getNameSpace(Node node) { - if (node instanceof Document) { - // select root element - return XPathUtil.selectNode("/*", node, null, null).getNamespaceURI(); - } - - return node.getNamespaceURI(); - } - } diff --git a/source/net/sourceforge/filebot/web/TVRageSearchEngine.java b/source/net/sourceforge/filebot/web/TVRageSearchEngine.java index e244e958..08ef5372 100644 --- a/source/net/sourceforge/filebot/web/TVRageSearchEngine.java +++ b/source/net/sourceforge/filebot/web/TVRageSearchEngine.java @@ -17,6 +17,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.filebot.resources.ResourceManager; +import net.sourceforge.tuned.XPathUtil; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -43,13 +44,13 @@ public class TVRageSearchEngine extends SearchEngine { Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm)); - List nodes = HtmlUtil.selectNodes("//DIV[@id='search_begin']//TABLE[1]//TR/TD/A[1]", dom); + List nodes = XPathUtil.selectNodes("//DIV[@id='search_begin']//TABLE[1]/*/TR/TD/A[1]", dom); ArrayList shows = new ArrayList(nodes.size()); for (Node node : nodes) { - String href = HtmlUtil.selectString("@href", node); - String title = HtmlUtil.selectString("text()", node); + String href = XPathUtil.selectString("@href", node); + String title = XPathUtil.selectString("text()", node); try { URL url = new URL(href); @@ -69,18 +70,18 @@ public class TVRageSearchEngine extends SearchEngine { Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season)); - List nodes = HtmlUtil.selectNodes("//TABLE[@class='b']//TR[@id='brow']", dom); + List nodes = XPathUtil.selectNodes("//TABLE[@class='b']//TR[@id='brow']", dom); ArrayList episodes = new ArrayList(); for (Node node : nodes) { - String seasonAndEpisodeNumber = HtmlUtil.selectString("./TD[2]/A/text()", node); - String title = HtmlUtil.selectString("./TD[4]/A/text()", node); + String seasonAndEpisodeNumber = XPathUtil.selectString("./TD[2]/A/text()", node); + String title = XPathUtil.selectString("./TD[4]/A/text()", node); - List precedings = HtmlUtil.selectNodes("../preceding-sibling::TABLE", node); + List precedings = XPathUtil.selectNodes("../preceding-sibling::TABLE", node); Node previousTable = precedings.get(precedings.size() - 1); - String seasonHeader = HtmlUtil.selectString("./TR/TD/FONT/text()", previousTable); + String seasonHeader = XPathUtil.selectString("./TR/TD/FONT/text()", previousTable); Matcher seasonMatcher = Pattern.compile("Season (\\d+)").matcher(seasonHeader); diff --git a/source/net/sourceforge/filebot/web/TvdotcomSearchEngine.java b/source/net/sourceforge/filebot/web/TvdotcomSearchEngine.java index 1248ec5a..24425296 100644 --- a/source/net/sourceforge/filebot/web/TvdotcomSearchEngine.java +++ b/source/net/sourceforge/filebot/web/TvdotcomSearchEngine.java @@ -16,6 +16,7 @@ import java.util.Map; import java.util.TreeMap; import net.sourceforge.filebot.resources.ResourceManager; +import net.sourceforge.tuned.XPathUtil; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -42,7 +43,7 @@ public class TvdotcomSearchEngine extends SearchEngine { Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm)); - List nodes = HtmlUtil.selectNodes("//html:TABLE[@id='search-results']//html:SPAN/html:A", dom); + List nodes = XPathUtil.selectNodes("//TABLE[@id='search-results']//SPAN/A", dom); ArrayList shows = new ArrayList(nodes.size()); @@ -52,7 +53,7 @@ public class TvdotcomSearchEngine extends SearchEngine { // we only want search results that are shows if (category.toLowerCase().startsWith("show")) { String title = node.getTextContent(); - String href = HtmlUtil.selectString("@href", node); + String href = XPathUtil.selectString("@href", node); try { URL url = new URL(href); @@ -74,7 +75,7 @@ public class TvdotcomSearchEngine extends SearchEngine { Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season)); - List nodes = HtmlUtil.selectNodes("//html:DIV[@id='episode-listing']/html:DIV/html:TABLE/html:TR/html:TD/ancestor::html:TR", dom); + List nodes = XPathUtil.selectNodes("//DIV[@id='episode-listing']/DIV/TABLE/TR/TD/ancestor::TR", dom); String seasonString = null; @@ -93,8 +94,8 @@ public class TvdotcomSearchEngine extends SearchEngine { episodeOffset = 0; for (Node node : nodes) { - String episodeNumber = HtmlUtil.selectString("./html:TD[1]/text()", node); - String title = HtmlUtil.selectString("./html:TD[2]/html:A/text()", node); + String episodeNumber = XPathUtil.selectString("./TD[1]/text()", node); + String title = XPathUtil.selectString("./TD[2]/A/text()", node); try { // format number of episode @@ -105,7 +106,7 @@ public class TvdotcomSearchEngine extends SearchEngine { episodeNumber = numberFormat.format(n - episodeOffset); } catch (NumberFormatException e) { - // episode number can be "Pilot" or "Special" + // episode number may be "Pilot", "Special", etc. } episodes.add(new Episode(showname, seasonString, episodeNumber, title)); diff --git a/source/net/sourceforge/tuned/XPathUtil.java b/source/net/sourceforge/tuned/XPathUtil.java index 7ea1890f..3fb672cd 100644 --- a/source/net/sourceforge/tuned/XPathUtil.java +++ b/source/net/sourceforge/tuned/XPathUtil.java @@ -3,11 +3,8 @@ package net.sourceforge.tuned; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; -import javax.xml.XMLConstants; -import javax.xml.namespace.NamespaceContext; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; @@ -18,9 +15,9 @@ import org.w3c.dom.NodeList; public class XPathUtil { - public static Node selectNode(String xpath, Object node, String namespacePrefix, String namespace) { + public static Node selectNode(String xpath, Object node) { try { - XPath xp = createXPath(namespacePrefix, namespace); + XPath xp = XPathFactory.newInstance().newXPath(); return (Node) xp.evaluate(xpath, node, XPathConstants.NODE); } catch (Exception e) { @@ -29,9 +26,9 @@ public class XPathUtil { } - public static List selectNodes(String xpath, Object node, String namespacePrefix, String namespace) { + public static List selectNodes(String xpath, Object node) { try { - XPath xp = createXPath(namespacePrefix, namespace); + XPath xp = XPathFactory.newInstance().newXPath(); NodeList nodeList = (NodeList) xp.evaluate(xpath, node, XPathConstants.NODESET); @@ -48,69 +45,13 @@ public class XPathUtil { } - public static String selectString(String xpath, Object node, String namespacePrefix, String namespace) { + public static String selectString(String xpath, Object node) { try { - XPath xp = createXPath(namespacePrefix, namespace); - return (String) xp.evaluate(xpath, node, XPathConstants.STRING); + XPath xp = XPathFactory.newInstance().newXPath(); + return ((String) xp.evaluate(xpath, node, XPathConstants.STRING)).trim(); } catch (Exception e) { throw new RuntimeException(e); } } - - private static XPath createXPath(String namespacePrefix, String namespace) { - XPath xp = XPathFactory.newInstance().newXPath(); - - if (namespacePrefix != null && namespace != null) { - xp.setNamespaceContext(new NamespaceContextProvider(namespacePrefix, namespace)); - } - - return xp; - } - - - private static class NamespaceContextProvider implements NamespaceContext { - - String boundPrefix; - String boundURI; - - - NamespaceContextProvider(String prefix, String URI) { - boundPrefix = prefix; - boundURI = URI; - } - - - public String getNamespaceURI(String prefix) { - if (prefix.equals(boundPrefix)) { - return boundURI; - } else if (prefix.equals(XMLConstants.XML_NS_PREFIX)) { - return XMLConstants.XML_NS_URI; - } else if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { - return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; - } else { - return XMLConstants.NULL_NS_URI; - } - } - - - public String getPrefix(String namespaceURI) { - if (namespaceURI.equals(boundURI)) { - return boundPrefix; - } else if (namespaceURI.equals(XMLConstants.XML_NS_URI)) { - return XMLConstants.XML_NS_PREFIX; - } else if (namespaceURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) { - return XMLConstants.XMLNS_ATTRIBUTE; - } else { - return null; - } - } - - - @SuppressWarnings("unchecked") - public Iterator getPrefixes(String namespaceURI) { - return null; - } - } - }