* imdb page scraper helper for people that really need it

2024-12-23 16:28:51 -05:00 · 2013-07-24 04:59:13 +00:00 · 2013-07-24 04:59:13 +00:00 · 6217589f82
commit 6217589f82
parent c8b6485d2a
2 changed files with 6 additions and 1 deletions
--- a/source/net/sourceforge/filebot/format/ExpressionFormat.lib.groovy
+++ b/source/net/sourceforge/filebot/format/ExpressionFormat.lib.groovy
@ -47,7 +47,7 @@ String.metaClass.pad = Number.metaClass.pad = { length = 2, padding = "0" -> del
 * Return a substring matching the given pattern or break.
 */
 String.metaClass.match = { String pattern, matchGroup = null -> 
-	def matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.MULTILINE).matcher(delegate)
+	def matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.MULTILINE | Pattern.DOTALL).matcher(delegate)
 	if (matcher.find())
 		return matcher.groupCount() > 0 && matchGroup == null ? matcher.group(1) : matcher.group(matchGroup ?: 0)
 	else
--- a/source/net/sourceforge/filebot/web/IMDbClient.java
+++ b/source/net/sourceforge/filebot/web/IMDbClient.java
@ -155,6 +155,11 @@ public class IMDbClient implements MovieIdentificationService {
 	}
 	
 	
+	public String scrape(String imdbid, String xpath) throws IOException, SAXException {
+		return selectString(xpath, parsePage(getMoviePageLink(getImdbId(imdbid)).toURL())); // helper for scraping data in user scripts
+	}
+	
+	
 	public URI getMoviePageLink(int imdbId) {
 		return URI.create(String.format("http://www.imdb.com/title/tt%07d/", imdbId));
 	}