From d421a0f55fcbad9e5f9ce20401551c74c83c68d6 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Fri, 18 Oct 2013 11:11:15 +0000 Subject: [PATCH] * better support for web-scraping in format expressions --- .../filebot/cli/ScriptShell.lib.groovy | 3 -- .../format/ExpressionFormat.lib.groovy | 35 +++++++++++++++++++ .../filebot/format/SecureCompiledScript.java | 1 + 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy b/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy index e7fb103b..a889f8b7 100644 --- a/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy +++ b/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy @@ -75,9 +75,6 @@ import java.nio.ByteBuffer import java.nio.charset.Charset import static net.sourceforge.filebot.web.WebRequest.* -URL.metaClass.getText = { readAll(getReader(delegate.openConnection())) } -URL.metaClass.getHtml = { new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText()) } -URL.metaClass.getXml = { new XmlParser().parseText(delegate.getText()) } URL.metaClass.fetch = { fetch(delegate) } ByteBuffer.metaClass.getText = { csn = "utf-8" -> Charset.forName(csn).decode(delegate.duplicate()).toString() } ByteBuffer.metaClass.getHtml = { csn = "utf-8" -> new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText(csn)) } diff --git a/source/net/sourceforge/filebot/format/ExpressionFormat.lib.groovy b/source/net/sourceforge/filebot/format/ExpressionFormat.lib.groovy index 04ec1c2c..11310866 100644 --- a/source/net/sourceforge/filebot/format/ExpressionFormat.lib.groovy +++ b/source/net/sourceforge/filebot/format/ExpressionFormat.lib.groovy @@ -170,3 +170,38 @@ String.metaClass.transliterate = { transformIdentifier -> com.ibm.icu.text.Trans * "カタカナ" -> "katakana" */ String.metaClass.ascii = { fallback = ' ' -> delegate.transliterate("Any-Latin;Latin-ASCII;[:Diacritic:]remove").replaceAll("[^\\p{ASCII}]+", fallback) } + + + + +/** +* Web and File IO helpers +*/ +import net.sourceforge.filebot.web.WebRequest +import net.sourceforge.tuned.FileUtilities +import net.sourceforge.tuned.XPathUtilities + +URL.metaClass.getText = { FileUtilities.readAll(WebRequest.getReader(delegate.openConnection())) } +URL.metaClass.getHtml = { new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText()) } +URL.metaClass.getXml = { new XmlParser().parseText(delegate.getText()) } +URL.metaClass.scrape = { xpath -> XPathUtilities.selectString(xpath, WebRequest.getHtmlDocument(delegate)) } +URL.metaClass.scrapeAll = { xpath -> XPathUtilities.selectNodes(xpath, WebRequest.getHtmlDocument(delegate)).findResults{ XPathUtilities.getTextContent(it) } } + + +/** +* XML / XPath utility functions +*/ +import javax.xml.xpath.XPathFactory +import javax.xml.xpath.XPathConstants + +File.metaClass.xpath = URL.metaClass.xpath = { String xpath -> + def input = new org.xml.sax.InputSource(new StringReader(delegate.getText())) + def result = XPathFactory.newInstance().newXPath().evaluate(xpath, input, XPathConstants.STRING) + return result.trim(); +} + +File.metaClass.xpath = URL.metaClass.xpathAll = { String xpath -> + def input = new org.xml.sax.InputSource(new StringReader(delegate.getText())) + def nodes = XPathFactory.newInstance().newXPath().evaluate(xpath, input, XPathConstants.NODESET) + return [0..nodes.length-1].findResults{ i -> nodes.item(i).getTextContent().trim() } +} diff --git a/source/net/sourceforge/filebot/format/SecureCompiledScript.java b/source/net/sourceforge/filebot/format/SecureCompiledScript.java index d56c1a3b..2a73dcbf 100644 --- a/source/net/sourceforge/filebot/format/SecureCompiledScript.java +++ b/source/net/sourceforge/filebot/format/SecureCompiledScript.java @@ -27,6 +27,7 @@ public class SecureCompiledScript extends CompiledScript { Permissions permissions = new Permissions(); permissions.add(new RuntimePermission("createClassLoader")); + permissions.add(new RuntimePermission("accessClassInPackage.*")); permissions.add(new RuntimePermission("modifyThread")); permissions.add(new FilePermission("<>", "read")); permissions.add(new SocketPermission("*", "connect"));