mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-11 22:08:01 -05:00
* better support for web-scraping in format expressions
This commit is contained in:
parent
859a0c045c
commit
d421a0f55f
@ -75,9 +75,6 @@ import java.nio.ByteBuffer
|
||||
import java.nio.charset.Charset
|
||||
import static net.sourceforge.filebot.web.WebRequest.*
|
||||
|
||||
URL.metaClass.getText = { readAll(getReader(delegate.openConnection())) }
|
||||
URL.metaClass.getHtml = { new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText()) }
|
||||
URL.metaClass.getXml = { new XmlParser().parseText(delegate.getText()) }
|
||||
URL.metaClass.fetch = { fetch(delegate) }
|
||||
ByteBuffer.metaClass.getText = { csn = "utf-8" -> Charset.forName(csn).decode(delegate.duplicate()).toString() }
|
||||
ByteBuffer.metaClass.getHtml = { csn = "utf-8" -> new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText(csn)) }
|
||||
|
@ -170,3 +170,38 @@ String.metaClass.transliterate = { transformIdentifier -> com.ibm.icu.text.Trans
|
||||
* "カタカナ" -> "katakana"
|
||||
*/
|
||||
String.metaClass.ascii = { fallback = ' ' -> delegate.transliterate("Any-Latin;Latin-ASCII;[:Diacritic:]remove").replaceAll("[^\\p{ASCII}]+", fallback) }
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Web and File IO helpers
|
||||
*/
|
||||
import net.sourceforge.filebot.web.WebRequest
|
||||
import net.sourceforge.tuned.FileUtilities
|
||||
import net.sourceforge.tuned.XPathUtilities
|
||||
|
||||
URL.metaClass.getText = { FileUtilities.readAll(WebRequest.getReader(delegate.openConnection())) }
|
||||
URL.metaClass.getHtml = { new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText()) }
|
||||
URL.metaClass.getXml = { new XmlParser().parseText(delegate.getText()) }
|
||||
URL.metaClass.scrape = { xpath -> XPathUtilities.selectString(xpath, WebRequest.getHtmlDocument(delegate)) }
|
||||
URL.metaClass.scrapeAll = { xpath -> XPathUtilities.selectNodes(xpath, WebRequest.getHtmlDocument(delegate)).findResults{ XPathUtilities.getTextContent(it) } }
|
||||
|
||||
|
||||
/**
|
||||
* XML / XPath utility functions
|
||||
*/
|
||||
import javax.xml.xpath.XPathFactory
|
||||
import javax.xml.xpath.XPathConstants
|
||||
|
||||
File.metaClass.xpath = URL.metaClass.xpath = { String xpath ->
|
||||
def input = new org.xml.sax.InputSource(new StringReader(delegate.getText()))
|
||||
def result = XPathFactory.newInstance().newXPath().evaluate(xpath, input, XPathConstants.STRING)
|
||||
return result.trim();
|
||||
}
|
||||
|
||||
File.metaClass.xpath = URL.metaClass.xpathAll = { String xpath ->
|
||||
def input = new org.xml.sax.InputSource(new StringReader(delegate.getText()))
|
||||
def nodes = XPathFactory.newInstance().newXPath().evaluate(xpath, input, XPathConstants.NODESET)
|
||||
return [0..nodes.length-1].findResults{ i -> nodes.item(i).getTextContent().trim() }
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ public class SecureCompiledScript extends CompiledScript {
|
||||
Permissions permissions = new Permissions();
|
||||
|
||||
permissions.add(new RuntimePermission("createClassLoader"));
|
||||
permissions.add(new RuntimePermission("accessClassInPackage.*"));
|
||||
permissions.add(new RuntimePermission("modifyThread"));
|
||||
permissions.add(new FilePermission("<<ALL FILES>>", "read"));
|
||||
permissions.add(new SocketPermission("*", "connect"));
|
||||
|
Loading…
Reference in New Issue
Block a user