* better support for web-scraping in format expressions

This commit is contained in:
Reinhard Pointner 2013-10-18 11:11:15 +00:00
parent 859a0c045c
commit d421a0f55f
3 changed files with 36 additions and 3 deletions

View File

@ -75,9 +75,6 @@ import java.nio.ByteBuffer
import java.nio.charset.Charset
import static net.sourceforge.filebot.web.WebRequest.*
URL.metaClass.getText = { readAll(getReader(delegate.openConnection())) }
URL.metaClass.getHtml = { new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText()) }
URL.metaClass.getXml = { new XmlParser().parseText(delegate.getText()) }
URL.metaClass.fetch = { fetch(delegate) }
ByteBuffer.metaClass.getText = { csn = "utf-8" -> Charset.forName(csn).decode(delegate.duplicate()).toString() }
ByteBuffer.metaClass.getHtml = { csn = "utf-8" -> new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText(csn)) }

View File

@ -170,3 +170,38 @@ String.metaClass.transliterate = { transformIdentifier -> com.ibm.icu.text.Trans
* "カタカナ" -> "katakana"
*/
String.metaClass.ascii = { fallback = ' ' -> delegate.transliterate("Any-Latin;Latin-ASCII;[:Diacritic:]remove").replaceAll("[^\\p{ASCII}]+", fallback) }
/**
* Web and File IO helpers
*/
import net.sourceforge.filebot.web.WebRequest
import net.sourceforge.tuned.FileUtilities
import net.sourceforge.tuned.XPathUtilities
URL.metaClass.getText = { FileUtilities.readAll(WebRequest.getReader(delegate.openConnection())) }
URL.metaClass.getHtml = { new XmlParser(new org.cyberneko.html.parsers.SAXParser()).parseText(delegate.getText()) }
URL.metaClass.getXml = { new XmlParser().parseText(delegate.getText()) }
URL.metaClass.scrape = { xpath -> XPathUtilities.selectString(xpath, WebRequest.getHtmlDocument(delegate)) }
URL.metaClass.scrapeAll = { xpath -> XPathUtilities.selectNodes(xpath, WebRequest.getHtmlDocument(delegate)).findResults{ XPathUtilities.getTextContent(it) } }
/**
* XML / XPath utility functions
*/
import javax.xml.xpath.XPathFactory
import javax.xml.xpath.XPathConstants
File.metaClass.xpath = URL.metaClass.xpath = { String xpath ->
def input = new org.xml.sax.InputSource(new StringReader(delegate.getText()))
def result = XPathFactory.newInstance().newXPath().evaluate(xpath, input, XPathConstants.STRING)
return result.trim();
}
File.metaClass.xpath = URL.metaClass.xpathAll = { String xpath ->
def input = new org.xml.sax.InputSource(new StringReader(delegate.getText()))
def nodes = XPathFactory.newInstance().newXPath().evaluate(xpath, input, XPathConstants.NODESET)
return [0..nodes.length-1].findResults{ i -> nodes.item(i).getTextContent().trim() }
}

View File

@ -27,6 +27,7 @@ public class SecureCompiledScript extends CompiledScript {
Permissions permissions = new Permissions();
permissions.add(new RuntimePermission("createClassLoader"));
permissions.add(new RuntimePermission("accessClassInPackage.*"));
permissions.add(new RuntimePermission("modifyThread"));
permissions.add(new FilePermission("<<ALL FILES>>", "read"));
permissions.add(new SocketPermission("*", "connect"));