disabled namespaces in nekohtml parser

2008-02-07 22:05:59 +00:00 · 2008-02-07 22:05:59 +00:00 · 3c0296d11e
parent 319a528542
commit 3c0296d11e
6 changed files with 38 additions and 122 deletions
--- a/source/net/sourceforge/filebot/torrent/Torrent.java
+++ b/source/net/sourceforge/filebot/torrent/Torrent.java
@ -28,11 +28,11 @@ public class Torrent {
 	
 	
 	public Torrent(File torrent) throws IOException {
-		FileInputStream in = new FileInputStream(torrent);
+		BufferedInputStream in = new BufferedInputStream(new FileInputStream(torrent));
 		Map<?, ?> torrentMap = null;
 		
 		try {
-			torrentMap = BDecoder.decode(new BufferedInputStream(in));
+			torrentMap = BDecoder.decode(in);
 		} finally {
 			in.close();
 		}
--- a/source/net/sourceforge/filebot/web/AnidbSearchEngine.java
+++ b/source/net/sourceforge/filebot/web/AnidbSearchEngine.java
@ -16,6 +16,7 @@ import java.util.Map;
 import java.util.TreeMap;

 import net.sourceforge.filebot.resources.ResourceManager;
+import net.sourceforge.tuned.XPathUtil;

 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
@ -42,19 +43,19 @@ public class AnidbSearchEngine extends SearchEngine {
 		
 		Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
 		
-		List<Node> nodes = HtmlUtil.selectNodes("//TABLE[@class='anime_list']//TR//TD//ancestor::TR", dom);
+		List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='anime_list']//TR//TD//ancestor::TR", dom);
 		ArrayList<String> shows = new ArrayList<String>(nodes.size());
 		
 		if (!nodes.isEmpty())
 			for (Node node : nodes) {
-				String type = HtmlUtil.selectString("./TD[2]/text()", node);
+				String type = XPathUtil.selectString("./TD[2]/text()", node);
 				
 				// we only want shows
 				if (type.equalsIgnoreCase("tv series")) {
-					Node titleNode = HtmlUtil.selectNode("./TD[1]/A", node);
+					Node titleNode = XPathUtil.selectNode("./TD[1]/A", node);
 					
-					String title = HtmlUtil.selectString("text()", titleNode);
-					String href = HtmlUtil.selectString("@href", titleNode);
+					String title = XPathUtil.selectString("text()", titleNode);
+					String href = XPathUtil.selectString("@href", titleNode);
 					
 					String file = "/perl-bin/" + href;
 					
@ -70,11 +71,11 @@ public class AnidbSearchEngine extends SearchEngine {
 			}
 		else {
 			// we might have been redirected to the episode list page directly
-			List<Node> results = HtmlUtil.selectNodes("//TABLE[@class='eplist']", dom);
+			List<Node> results = XPathUtil.selectNodes("//TABLE[@class='eplist']", dom);
 			
 			if (!results.isEmpty()) {
 				// get show's name from the document
-				String header = HtmlUtil.selectString("//DIV[@id='layout-content']//H1[1]/text()", dom);
+				String header = XPathUtil.selectString("//DIV[@id='layout-content']//H1[1]/text()", dom);
 				String title = header.replaceFirst("Anime:\\s*", "");
 				
 				cache.put(title, getSearchUrl(searchterm));
@ -92,7 +93,7 @@ public class AnidbSearchEngine extends SearchEngine {
 		
 		Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season));
 		
-		List<Node> nodes = HtmlUtil.selectNodes("//TABLE[@id='eplist']//TR/TD/SPAN/ancestor::TR", dom);
+		List<Node> nodes = XPathUtil.selectNodes("//TABLE[@id='eplist']//TR/TD/SPAN/ancestor::TR", dom);
 		
 		LinkedList<Episode> list = new LinkedList<Episode>();
 		
@ -101,8 +102,8 @@ public class AnidbSearchEngine extends SearchEngine {
 		f.setGroupingUsed(false);
 		
 		for (Node node : nodes) {
-			String number = HtmlUtil.selectString("./TD[1]/A/text()", node);
-			String title = HtmlUtil.selectString("./TD[2]/SPAN/text()", node);
+			String number = XPathUtil.selectString("./TD[1]/A/text()", node);
+			String title = XPathUtil.selectString("./TD[2]/SPAN/text()", node);
 			
 			if (title.startsWith("recap"))
 				title = title.replaceFirst("recap", "");
--- a/source/net/sourceforge/filebot/web/HtmlUtil.java
+++ b/source/net/sourceforge/filebot/web/HtmlUtil.java
@ -9,21 +9,17 @@ import java.io.Reader;
 import java.net.URL;
 import java.net.URLConnection;
 import java.nio.charset.Charset;
-import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.zip.GZIPInputStream;

-import net.sourceforge.tuned.XPathUtil;
-
 import org.cyberneko.html.parsers.DOMParser;
 import org.w3c.dom.Document;
-import org.w3c.dom.Node;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;


-class HtmlUtil {
+public class HtmlUtil {
 	
 	private static Charset getCharset(String contentType) {
 		if (contentType != null) {
@ -58,34 +54,10 @@ class HtmlUtil {

 	public static Document getHtmlDocument(Reader reader) throws SAXException, IOException {
 		DOMParser parser = new DOMParser();
+		parser.setFeature("http://xml.org/sax/features/namespaces", false);
 		parser.parse(new InputSource(reader));
 		
 		return parser.getDocument();
 	}
 	
-
-	public static String selectString(String xpath, Node node) {
-		return XPathUtil.selectString(xpath, node, "html", getNameSpace(node)).trim();
-	}
-	
-
-	public static List<Node> selectNodes(String xpath, Node node) {
-		return XPathUtil.selectNodes(xpath, node, "html", getNameSpace(node));
-	}
-	
-
-	public static Node selectNode(String xpath, Node node) {
-		return XPathUtil.selectNode(xpath, node, "html", getNameSpace(node));
-	}
-	
-
-	private static String getNameSpace(Node node) {
-		if (node instanceof Document) {
-			// select root element
-			return XPathUtil.selectNode("/*", node, null, null).getNamespaceURI();
-		}
-		
-		return node.getNamespaceURI();
-	}
-	
 }
--- a/source/net/sourceforge/filebot/web/TVRageSearchEngine.java
+++ b/source/net/sourceforge/filebot/web/TVRageSearchEngine.java
@ -17,6 +17,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import net.sourceforge.filebot.resources.ResourceManager;
+import net.sourceforge.tuned.XPathUtil;

 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
@ -43,13 +44,13 @@ public class TVRageSearchEngine extends SearchEngine {
 		
 		Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
 		
-		List<Node> nodes = HtmlUtil.selectNodes("//DIV[@id='search_begin']//TABLE[1]//TR/TD/A[1]", dom);
+		List<Node> nodes = XPathUtil.selectNodes("//DIV[@id='search_begin']//TABLE[1]/*/TR/TD/A[1]", dom);
 		
 		ArrayList<String> shows = new ArrayList<String>(nodes.size());
 		
 		for (Node node : nodes) {
-			String href = HtmlUtil.selectString("@href", node);
-			String title = HtmlUtil.selectString("text()", node);
+			String href = XPathUtil.selectString("@href", node);
+			String title = XPathUtil.selectString("text()", node);
 			
 			try {
 				URL url = new URL(href);
@ -69,18 +70,18 @@ public class TVRageSearchEngine extends SearchEngine {
 		
 		Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season));
 		
-		List<Node> nodes = HtmlUtil.selectNodes("//TABLE[@class='b']//TR[@id='brow']", dom);
+		List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='b']//TR[@id='brow']", dom);
 		
 		ArrayList<Episode> episodes = new ArrayList<Episode>();
 		
 		for (Node node : nodes) {
-			String seasonAndEpisodeNumber = HtmlUtil.selectString("./TD[2]/A/text()", node);
-			String title = HtmlUtil.selectString("./TD[4]/A/text()", node);
+			String seasonAndEpisodeNumber = XPathUtil.selectString("./TD[2]/A/text()", node);
+			String title = XPathUtil.selectString("./TD[4]/A/text()", node);
 			
-			List<Node> precedings = HtmlUtil.selectNodes("../preceding-sibling::TABLE", node);
+			List<Node> precedings = XPathUtil.selectNodes("../preceding-sibling::TABLE", node);
 			Node previousTable = precedings.get(precedings.size() - 1);
 			
-			String seasonHeader = HtmlUtil.selectString("./TR/TD/FONT/text()", previousTable);
+			String seasonHeader = XPathUtil.selectString("./TR/TD/FONT/text()", previousTable);
 			
 			Matcher seasonMatcher = Pattern.compile("Season (\\d+)").matcher(seasonHeader);
 			
--- a/source/net/sourceforge/filebot/web/TvdotcomSearchEngine.java
+++ b/source/net/sourceforge/filebot/web/TvdotcomSearchEngine.java
@ -16,6 +16,7 @@ import java.util.Map;
 import java.util.TreeMap;

 import net.sourceforge.filebot.resources.ResourceManager;
+import net.sourceforge.tuned.XPathUtil;

 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
@ -42,7 +43,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
 		
 		Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
 		
-		List<Node> nodes = HtmlUtil.selectNodes("//html:TABLE[@id='search-results']//html:SPAN/html:A", dom);
+		List<Node> nodes = XPathUtil.selectNodes("//TABLE[@id='search-results']//SPAN/A", dom);
 		
 		ArrayList<String> shows = new ArrayList<String>(nodes.size());
 		
@ -52,7 +53,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
 			// we only want search results that are shows
 			if (category.toLowerCase().startsWith("show")) {
 				String title = node.getTextContent();
-				String href = HtmlUtil.selectString("@href", node);
+				String href = XPathUtil.selectString("@href", node);
 				
 				try {
 					URL url = new URL(href);
@ -74,7 +75,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
 		
 		Document dom = HtmlUtil.getHtmlDocument(getEpisodeListUrl(showname, season));
 		
-		List<Node> nodes = HtmlUtil.selectNodes("//html:DIV[@id='episode-listing']/html:DIV/html:TABLE/html:TR/html:TD/ancestor::html:TR", dom);
+		List<Node> nodes = XPathUtil.selectNodes("//DIV[@id='episode-listing']/DIV/TABLE/TR/TD/ancestor::TR", dom);
 		
 		String seasonString = null;
 		
@ -93,8 +94,8 @@ public class TvdotcomSearchEngine extends SearchEngine {
 			episodeOffset = 0;
 		
 		for (Node node : nodes) {
-			String episodeNumber = HtmlUtil.selectString("./html:TD[1]/text()", node);
-			String title = HtmlUtil.selectString("./html:TD[2]/html:A/text()", node);
+			String episodeNumber = XPathUtil.selectString("./TD[1]/text()", node);
+			String title = XPathUtil.selectString("./TD[2]/A/text()", node);
 			
 			try {
 				// format number of episode
@ -105,7 +106,7 @@ public class TvdotcomSearchEngine extends SearchEngine {
 				
 				episodeNumber = numberFormat.format(n - episodeOffset);
 			} catch (NumberFormatException e) {
-				// episode number can be "Pilot" or "Special"
+				// episode number may be "Pilot", "Special", etc.
 			}
 			
 			episodes.add(new Episode(showname, seasonString, episodeNumber, title));
--- a/source/net/sourceforge/tuned/XPathUtil.java
+++ b/source/net/sourceforge/tuned/XPathUtil.java
@ -3,11 +3,8 @@ package net.sourceforge.tuned;


 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;

-import javax.xml.XMLConstants;
-import javax.xml.namespace.NamespaceContext;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathFactory;
@ -18,9 +15,9 @@ import org.w3c.dom.NodeList;

 public class XPathUtil {
 	
-	public static Node selectNode(String xpath, Object node, String namespacePrefix, String namespace) {
+	public static Node selectNode(String xpath, Object node) {
 		try {
-			XPath xp = createXPath(namespacePrefix, namespace);
+			XPath xp = XPathFactory.newInstance().newXPath();
 			
 			return (Node) xp.evaluate(xpath, node, XPathConstants.NODE);
 		} catch (Exception e) {
@ -29,9 +26,9 @@ public class XPathUtil {
 	}
 	

-	public static List<Node> selectNodes(String xpath, Object node, String namespacePrefix, String namespace) {
+	public static List<Node> selectNodes(String xpath, Object node) {
 		try {
-			XPath xp = createXPath(namespacePrefix, namespace);
+			XPath xp = XPathFactory.newInstance().newXPath();
 			
 			NodeList nodeList = (NodeList) xp.evaluate(xpath, node, XPathConstants.NODESET);
 			
@ -48,69 +45,13 @@ public class XPathUtil {
 	}
 	

-	public static String selectString(String xpath, Object node, String namespacePrefix, String namespace) {
+	public static String selectString(String xpath, Object node) {
 		try {
-			XPath xp = createXPath(namespacePrefix, namespace);
-			return (String) xp.evaluate(xpath, node, XPathConstants.STRING);
+			XPath xp = XPathFactory.newInstance().newXPath();
+			return ((String) xp.evaluate(xpath, node, XPathConstants.STRING)).trim();
 		} catch (Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	
-
-	private static XPath createXPath(String namespacePrefix, String namespace) {
-		XPath xp = XPathFactory.newInstance().newXPath();
-		
-		if (namespacePrefix != null && namespace != null) {
-			xp.setNamespaceContext(new NamespaceContextProvider(namespacePrefix, namespace));
-		}
-		
-		return xp;
-	}
-	
-	
-	private static class NamespaceContextProvider implements NamespaceContext {
-		
-		String boundPrefix;
-		String boundURI;
-		
-		
-		NamespaceContextProvider(String prefix, String URI) {
-			boundPrefix = prefix;
-			boundURI = URI;
-		}
-		
-
-		public String getNamespaceURI(String prefix) {
-			if (prefix.equals(boundPrefix)) {
-				return boundURI;
-			} else if (prefix.equals(XMLConstants.XML_NS_PREFIX)) {
-				return XMLConstants.XML_NS_URI;
-			} else if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) {
-				return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
-			} else {
-				return XMLConstants.NULL_NS_URI;
-			}
-		}
-		
-
-		public String getPrefix(String namespaceURI) {
-			if (namespaceURI.equals(boundURI)) {
-				return boundPrefix;
-			} else if (namespaceURI.equals(XMLConstants.XML_NS_URI)) {
-				return XMLConstants.XML_NS_PREFIX;
-			} else if (namespaceURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
-				return XMLConstants.XMLNS_ATTRIBUTE;
-			} else {
-				return null;
-			}
-		}
-		
-
-		@SuppressWarnings("unchecked")
-		public Iterator getPrefixes(String namespaceURI) {
-			return null;
-		}
-	}
-	
 }