1
0
mirror of https://github.com/mitb-archive/filebot synced 2025-01-10 21:38:04 -05:00

* separate long-term caches that have different update frequencies

This commit is contained in:
Reinhard Pointner 2013-11-29 04:29:56 +00:00
parent e7668f2c5c
commit 198b8b0e06
5 changed files with 90 additions and 118 deletions

View File

@ -46,12 +46,26 @@
memoryStoreEvictionPolicy="LRU" memoryStoreEvictionPolicy="LRU"
/> />
<!--
Long-lived (2 months) persistent disk cache for web responses (that can be updated via If-Modified or If-None-Match)
-->
<cache name="web-datasource-lv3"
maxElementsInMemory="200"
maxElementsOnDisk="95000"
eternal="false"
timeToIdleSeconds="5256000"
timeToLiveSeconds="5256000"
overflowToDisk="true"
diskPersistent="true"
memoryStoreEvictionPolicy="LRU"
/>
<!-- <!--
Very long-lived cache (4 months) anime/series lists, movie index, etc Very long-lived cache (4 months) anime/series lists, movie index, etc
--> -->
<cache name="web-persistent-datasource" <cache name="web-persistent-datasource"
maxElementsInMemory="200" maxElementsInMemory="50"
maxElementsOnDisk="95000" maxElementsOnDisk="5000"
eternal="false" eternal="false"
timeToIdleSeconds="10512000" timeToIdleSeconds="10512000"
timeToLiveSeconds="10512000" timeToLiveSeconds="10512000"

View File

@ -23,7 +23,7 @@ public class CachedXmlResource extends AbstractCachedResource<String, String> {
@Override @Override
protected Cache getCache() { protected Cache getCache() {
return CacheManager.getInstance().getCache("web-persistent-datasource"); return CacheManager.getInstance().getCache("web-datasource-lv3");
} }
public Document getDocument() throws IOException { public Document getDocument() throws IOException {

View File

@ -45,7 +45,7 @@ public abstract class ETagCachedResource<T extends Serializable> extends CachedR
@Override @Override
protected Cache getCache() { protected Cache getCache() {
return CacheManager.getInstance().getCache("web-persistent-datasource"); return CacheManager.getInstance().getCache("web-datasource-lv3");
} }
} }

View File

@ -1,7 +1,5 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.*; import static net.sourceforge.filebot.web.WebRequest.*;
import static net.sourceforge.tuned.XPathUtilities.*; import static net.sourceforge.tuned.XPathUtilities.*;
@ -25,47 +23,40 @@ import net.sourceforge.filebot.web.FanartTV.FanartDescriptor.FanartProperty;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Node; import org.w3c.dom.Node;
public class FanartTV { public class FanartTV {
private String apikey; private String apikey;
public FanartTV(String apikey) { public FanartTV(String apikey) {
this.apikey = apikey; this.apikey = apikey;
} }
public List<FanartDescriptor> getSeriesArtwork(int tvdbid) throws Exception { public List<FanartDescriptor> getSeriesArtwork(int tvdbid) throws Exception {
return getSeriesArtwork(String.valueOf(tvdbid), "all", 1, 2); return getSeriesArtwork(String.valueOf(tvdbid), "all", 1, 2);
} }
public List<FanartDescriptor> getSeriesArtwork(String id, String type, int sort, int limit) throws Exception { public List<FanartDescriptor> getSeriesArtwork(String id, String type, int sort, int limit) throws Exception {
return getArtwork("series", id, type, sort, limit); return getArtwork("series", id, type, sort, limit);
} }
public List<FanartDescriptor> getMovieArtwork(int tmdbid) throws Exception { public List<FanartDescriptor> getMovieArtwork(int tmdbid) throws Exception {
return getMovieArtwork(String.valueOf(tmdbid), "all", 1, 2); return getMovieArtwork(String.valueOf(tmdbid), "all", 1, 2);
} }
public List<FanartDescriptor> getMovieArtwork(String id, String type, int sort, int limit) throws Exception { public List<FanartDescriptor> getMovieArtwork(String id, String type, int sort, int limit) throws Exception {
return getArtwork("movie", id, type, sort, limit); return getArtwork("movie", id, type, sort, limit);
} }
public List<FanartDescriptor> getArtwork(String category, String id, String type, int sort, int limit) throws Exception { public List<FanartDescriptor> getArtwork(String category, String id, String type, int sort, int limit) throws Exception {
String resource = getResource(category, id, "xml", type, sort, limit); String resource = getResource(category, id, "xml", type, sort, limit);
// cache results // cache results
CachedResource<FanartDescriptor[]> data = new CachedResource<FanartDescriptor[]>(resource, FanartDescriptor[].class) { CachedResource<FanartDescriptor[]> data = new CachedResource<FanartDescriptor[]>(resource, FanartDescriptor[].class) {
@Override @Override
public FanartDescriptor[] process(ByteBuffer data) throws Exception { public FanartDescriptor[] process(ByteBuffer data) throws Exception {
Document dom = getDocument(Charset.forName("UTF-8").decode(data).toString()); Document dom = getDocument(Charset.forName("UTF-8").decode(data).toString());
List<FanartDescriptor> fanart = new ArrayList<FanartDescriptor>(); List<FanartDescriptor> fanart = new ArrayList<FanartDescriptor>();
for (Node node : selectNodes("//*[@url]", dom)) { for (Node node : selectNodes("//*[@url]", dom)) {
// e.g. <seasonthumb id="3481" url="http://fanart.tv/fanart/tv/70327/seasonthumb/3481/Buffy (6).jpg" lang="en" likes="0" season="6"/> // e.g. <seasonthumb id="3481" url="http://fanart.tv/fanart/tv/70327/seasonthumb/3481/Buffy (6).jpg" lang="en" likes="0" season="6"/>
@ -79,61 +70,52 @@ public class FanartTV {
} }
fanart.add(new FanartDescriptor(fields)); fanart.add(new FanartDescriptor(fields));
} }
return fanart.toArray(new FanartDescriptor[0]); return fanart.toArray(new FanartDescriptor[0]);
} }
@Override @Override
protected Cache getCache() { protected Cache getCache() {
return CacheManager.getInstance().getCache("web-datasource"); return CacheManager.getInstance().getCache("web-datasource-lv2");
} }
}; };
return Arrays.asList(data.get()); return Arrays.asList(data.get());
} }
public String getResource(String category, String id, String format, String type, int sort, int limit) throws MalformedURLException { public String getResource(String category, String id, String format, String type, int sort, int limit) throws MalformedURLException {
// e.g. http://fanart.tv/webservice/series/780b986b22c35e6f7a134a2f392c2deb/70327/xml/all/1/2 // e.g. http://fanart.tv/webservice/series/780b986b22c35e6f7a134a2f392c2deb/70327/xml/all/1/2
return String.format("http://api.fanart.tv/webservice/%s/%s/%s/%s/%s/%s/%s", category, apikey, id, format, type, sort, limit); return String.format("http://api.fanart.tv/webservice/%s/%s/%s/%s/%s/%s/%s", category, apikey, id, format, type, sort, limit);
} }
public static class FanartDescriptor implements Serializable { public static class FanartDescriptor implements Serializable {
public static enum FanartProperty { public static enum FanartProperty {
type, id, url, lang, likes, season, disc_type type, id, url, lang, likes, season, disc_type
} }
protected Map<FanartProperty, String> fields; protected Map<FanartProperty, String> fields;
protected FanartDescriptor() { protected FanartDescriptor() {
// used by serializer // used by serializer
} }
protected FanartDescriptor(Map<FanartProperty, String> fields) { protected FanartDescriptor(Map<FanartProperty, String> fields) {
this.fields = new EnumMap<FanartProperty, String>(fields); this.fields = new EnumMap<FanartProperty, String>(fields);
} }
public String get(Object key) { public String get(Object key) {
return fields.get(FanartProperty.valueOf(key.toString())); return fields.get(FanartProperty.valueOf(key.toString()));
} }
public String get(FanartProperty key) { public String get(FanartProperty key) {
return fields.get(key); return fields.get(key);
} }
public String getType() { public String getType() {
return fields.get(FanartProperty.type); return fields.get(FanartProperty.type);
} }
public Integer getId() { public Integer getId() {
try { try {
return new Integer(fields.get(FanartProperty.id)); return new Integer(fields.get(FanartProperty.id));
@ -141,13 +123,11 @@ public class FanartTV {
return null; return null;
} }
} }
public String getName() { public String getName() {
return new File(getUrl().getFile()).getName(); return new File(getUrl().getFile()).getName();
} }
public URL getUrl() { public URL getUrl() {
try { try {
return new URL(fields.get(FanartProperty.url).replaceAll(" ", "%20")); // work around server-side url encoding issues return new URL(fields.get(FanartProperty.url).replaceAll(" ", "%20")); // work around server-side url encoding issues
@ -155,8 +135,7 @@ public class FanartTV {
return null; return null;
} }
} }
public Integer getLikes() { public Integer getLikes() {
try { try {
return new Integer(fields.get(FanartProperty.likes)); return new Integer(fields.get(FanartProperty.likes));
@ -164,8 +143,7 @@ public class FanartTV {
return null; return null;
} }
} }
public Locale getLanguage() { public Locale getLanguage() {
try { try {
return new Locale(fields.get(FanartProperty.lang)); return new Locale(fields.get(FanartProperty.lang));
@ -173,8 +151,7 @@ public class FanartTV {
return null; return null;
} }
} }
public Integer getSeason() { public Integer getSeason() {
try { try {
return new Integer(fields.get(FanartProperty.season)); return new Integer(fields.get(FanartProperty.season));
@ -182,17 +159,15 @@ public class FanartTV {
return null; return null;
} }
} }
public String getDiskType() { public String getDiskType() {
return fields.get(FanartProperty.disc_type); return fields.get(FanartProperty.disc_type);
} }
@Override @Override
public String toString() { public String toString() {
return fields.toString(); return fields.toString();
} }
} }
} }

View File

@ -1,7 +1,5 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.*; import static net.sourceforge.filebot.web.WebRequest.*;
import static net.sourceforge.tuned.XPathUtilities.*; import static net.sourceforge.tuned.XPathUtilities.*;
@ -38,59 +36,54 @@ import org.w3c.dom.Document;
import org.w3c.dom.Node; import org.w3c.dom.Node;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
public class IMDbClient implements MovieIdentificationService { public class IMDbClient implements MovieIdentificationService {
private String host = "www.imdb.com"; private String host = "www.imdb.com";
@Override @Override
public String getName() { public String getName() {
return "IMDb"; return "IMDb";
} }
@Override @Override
public Icon getIcon() { public Icon getIcon() {
return ResourceManager.getIcon("search.imdb"); return ResourceManager.getIcon("search.imdb");
} }
protected int getImdbId(String link) { protected int getImdbId(String link) {
Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link); Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link);
if (matcher.find()) { if (matcher.find()) {
return Integer.parseInt(matcher.group(1)); return Integer.parseInt(matcher.group(1));
} }
// pattern not found // pattern not found
throw new IllegalArgumentException(String.format("Cannot find imdb id: %s", link)); throw new IllegalArgumentException(String.format("Cannot find imdb id: %s", link));
} }
@Override @Override
public List<Movie> searchMovie(String query, Locale locale) throws Exception { public List<Movie> searchMovie(String query, Locale locale) throws Exception {
Document dom = parsePage(new URL("http", host, "/find?s=tt&q=" + encode(query, false))); Document dom = parsePage(new URL("http", host, "/find?s=tt&q=" + encode(query, false)));
// select movie links followed by year in parenthesis // select movie links followed by year in parenthesis
List<Node> nodes = selectNodes("//TABLE[@class='findList']//TD/A[substring-after(substring-before(following::text(),')'),'(')]", dom); List<Node> nodes = selectNodes("//TABLE[@class='findList']//TD/A[substring-after(substring-before(following::text(),')'),'(')]", dom);
List<Movie> results = new ArrayList<Movie>(nodes.size()); List<Movie> results = new ArrayList<Movie>(nodes.size());
for (Node node : nodes) { for (Node node : nodes) {
try { try {
String name = node.getTextContent().trim(); String name = node.getTextContent().trim();
if (name.startsWith("\"")) if (name.startsWith("\""))
continue; continue;
String year = node.getNextSibling().getTextContent().trim().replaceFirst("^\\(I\\)", "").replaceAll("[\\p{Punct}\\p{Space}]+", ""); // remove non-number characters String year = node.getNextSibling().getTextContent().trim().replaceFirst("^\\(I\\)", "").replaceAll("[\\p{Punct}\\p{Space}]+", ""); // remove non-number characters
String href = getAttribute("href", node); String href = getAttribute("href", node);
results.add(new Movie(name, Integer.parseInt(year), getImdbId(href), -1)); results.add(new Movie(name, Integer.parseInt(year), getImdbId(href), -1));
} catch (Exception e) { } catch (Exception e) {
// ignore illegal movies (TV Shows, Videos, Video Games, etc) // ignore illegal movies (TV Shows, Videos, Video Games, etc)
} }
} }
// we might have been redirected to the movie page // we might have been redirected to the movie page
if (results.isEmpty()) { if (results.isEmpty()) {
try { try {
@ -103,28 +96,26 @@ public class IMDbClient implements MovieIdentificationService {
// ignore, can't find movie // ignore, can't find movie
} }
} }
return results; return results;
} }
protected Movie scrapeMovie(Document dom, Locale locale) { protected Movie scrapeMovie(Document dom, Locale locale) {
try { try {
int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom)); int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom));
String title = selectString("//META[@property='og:title']/@content", dom); String title = selectString("//META[@property='og:title']/@content", dom);
Matcher titleMatcher = Pattern.compile("(.+)\\s\\((?i:tv.|video.)?(\\d{4})\\)$").matcher(title); Matcher titleMatcher = Pattern.compile("(.+)\\s\\((?i:tv.|video.)?(\\d{4})\\)$").matcher(title);
if (!titleMatcher.matches()) if (!titleMatcher.matches())
return null; return null;
return new Movie(titleMatcher.group(1), Integer.parseInt(titleMatcher.group(2)), imdbid, -1); return new Movie(titleMatcher.group(1), Integer.parseInt(titleMatcher.group(2)), imdbid, -1);
} catch (Exception e) { } catch (Exception e) {
// ignore, we probably got redirected to an error page // ignore, we probably got redirected to an error page
return null; return null;
} }
} }
@Override @Override
public Movie getMovieDescriptor(int imdbid, Locale locale) throws Exception { public Movie getMovieDescriptor(int imdbid, Locale locale) throws Exception {
try { try {
@ -133,56 +124,50 @@ public class IMDbClient implements MovieIdentificationService {
return null; // illegal imdbid return null; // illegal imdbid
} }
} }
protected Document parsePage(URL url) throws IOException, SAXException { protected Document parsePage(URL url) throws IOException, SAXException {
CachedPage page = new CachedPage(url) { CachedPage page = new CachedPage(url) {
@Override @Override
protected Reader openConnection(URL url) throws IOException { protected Reader openConnection(URL url) throws IOException {
URLConnection connection = url.openConnection(); URLConnection connection = url.openConnection();
// IMDb refuses default user agent (Java/1.6.0_12) => SPOOF GOOGLEBOT // IMDb refuses default user agent (Java/1.6.0_12) => SPOOF GOOGLEBOT
connection.addRequestProperty("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"); connection.addRequestProperty("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
connection.addRequestProperty("From", "googlebot(at)googlebot.com"); connection.addRequestProperty("From", "googlebot(at)googlebot.com");
connection.addRequestProperty("Accept", "*/*"); connection.addRequestProperty("Accept", "*/*");
connection.addRequestProperty("X-Forwarded-For", "66.249.73.100"); // TRICK ANNOYING IMDB GEO-LOCATION LOCALIZATION connection.addRequestProperty("X-Forwarded-For", "66.249.73.100"); // TRICK ANNOYING IMDB GEO-LOCATION LOCALIZATION
return getReader(connection); return getReader(connection);
} }
}; };
return getHtmlDocument(page.get()); return getHtmlDocument(page.get());
} }
public String scrape(String imdbid, String xpath) throws IOException, SAXException { public String scrape(String imdbid, String xpath) throws IOException, SAXException {
return scrape(getMoviePageLink(getImdbId(imdbid)).toURL(), xpath); // helper for scraping data in user scripts return scrape(getMoviePageLink(getImdbId(imdbid)).toURL(), xpath); // helper for scraping data in user scripts
} }
public String scrape(URL url, String xpath) throws IOException, SAXException { public String scrape(URL url, String xpath) throws IOException, SAXException {
return selectString(xpath, parsePage(url)); // helper for scraping data in user scripts return selectString(xpath, parsePage(url)); // helper for scraping data in user scripts
} }
public URI getMoviePageLink(int imdbId) { public URI getMoviePageLink(int imdbId) {
return URI.create(String.format("http://www.imdb.com/title/tt%07d/", imdbId)); return URI.create(String.format("http://www.imdb.com/title/tt%07d/", imdbId));
} }
@Override @Override
public Map<File, Movie> getMovieDescriptors(Collection<File> movieFiles, Locale locale) throws Exception { public Map<File, Movie> getMovieDescriptors(Collection<File> movieFiles, Locale locale) throws Exception {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@SuppressWarnings({ "unchecked", "rawtypes" }) @SuppressWarnings({ "unchecked", "rawtypes" })
public Map<String, String> getImdbApiData(Integer i, String t, String y, boolean tomatoes) throws IOException { public Map<String, String> getImdbApiData(Integer i, String t, String y, boolean tomatoes) throws IOException {
// e.g. http://www.imdbapi.com/?i=tt0379786&r=xml&tomatoes=true // e.g. http://www.imdbapi.com/?i=tt0379786&r=xml&tomatoes=true
String url = String.format("http://www.omdbapi.com/?i=%s&t=%s&y=%s&r=xml&tomatoes=%s", String.format(i == null ? "" : "tt%07d", i), t, y, tomatoes); String url = String.format("http://www.omdbapi.com/?i=%s&t=%s&y=%s&r=xml&tomatoes=%s", String.format(i == null ? "" : "tt%07d", i), t, y, tomatoes);
CachedResource<HashMap> data = new CachedResource<HashMap>(url, HashMap.class) { CachedResource<HashMap> data = new CachedResource<HashMap>(url, HashMap.class) {
@Override @Override
public HashMap process(ByteBuffer data) throws Exception { public HashMap process(ByteBuffer data) throws Exception {
Document xml = getDocument(Charset.forName("UTF-8").decode(data).toString()); Document xml = getDocument(Charset.forName("UTF-8").decode(data).toString());
@ -192,26 +177,24 @@ public class IMDbClient implements MovieIdentificationService {
} }
return attr; return attr;
} }
@Override @Override
protected Cache getCache() { protected Cache getCache() {
return CacheManager.getInstance().getCache("web-datasource"); return CacheManager.getInstance().getCache("web-datasource-lv2");
} }
}; };
return data.get(); return data.get();
} }
public MovieInfo getImdbApiMovieInfo(Movie movie) throws IOException { public MovieInfo getImdbApiMovieInfo(Movie movie) throws IOException {
Map<String, String> data = movie.getImdbId() > 0 ? getImdbApiData(movie.getImdbId(), "", "", false) : getImdbApiData(null, movie.getName(), String.valueOf(movie.getYear()), false); Map<String, String> data = movie.getImdbId() > 0 ? getImdbApiData(movie.getImdbId(), "", "", false) : getImdbApiData(null, movie.getName(), String.valueOf(movie.getYear()), false);
// sanity check // sanity check
if (!Boolean.parseBoolean(data.get("response"))) { if (!Boolean.parseBoolean(data.get("response"))) {
throw new IllegalArgumentException("Movie not found: " + data); throw new IllegalArgumentException("Movie not found: " + data);
} }
Map<MovieProperty, String> fields = new EnumMap<MovieProperty, String>(MovieProperty.class); Map<MovieProperty, String> fields = new EnumMap<MovieProperty, String>(MovieProperty.class);
fields.put(MovieProperty.title, data.get("title")); fields.put(MovieProperty.title, data.get("title"));
fields.put(MovieProperty.certification, data.get("rated")); fields.put(MovieProperty.certification, data.get("rated"));
@ -221,7 +204,7 @@ public class IMDbClient implements MovieIdentificationService {
fields.put(MovieProperty.vote_count, data.get("imdbVotes").replaceAll("\\D", "")); fields.put(MovieProperty.vote_count, data.get("imdbVotes").replaceAll("\\D", ""));
fields.put(MovieProperty.imdb_id, data.get("imdbID")); fields.put(MovieProperty.imdb_id, data.get("imdbID"));
fields.put(MovieProperty.poster_path, data.get("poster")); fields.put(MovieProperty.poster_path, data.get("poster"));
// convert release date to yyyy-MM-dd // convert release date to yyyy-MM-dd
Date released = Date.parse(data.get("released"), "dd MMM yyyy"); Date released = Date.parse(data.get("released"), "dd MMM yyyy");
if (released != null) { if (released != null) {
@ -232,25 +215,25 @@ public class IMDbClient implements MovieIdentificationService {
fields.put(MovieProperty.release_date, year.format("yyyy-MM-dd")); fields.put(MovieProperty.release_date, year.format("yyyy-MM-dd"));
} }
} }
List<String> genres = new ArrayList<String>(); List<String> genres = new ArrayList<String>();
for (String it : data.get("genre").split(",")) { for (String it : data.get("genre").split(",")) {
genres.add(it.trim()); genres.add(it.trim());
} }
List<Person> actors = new ArrayList<Person>(); List<Person> actors = new ArrayList<Person>();
for (String it : data.get("actors").split(",")) { for (String it : data.get("actors").split(",")) {
actors.add(new Person(it.trim(), null, null)); actors.add(new Person(it.trim(), null, null));
} }
for (String director : data.get("director").split(",")) { for (String director : data.get("director").split(",")) {
actors.add(new Person(director, null, "Director")); actors.add(new Person(director, null, "Director"));
} }
for (String writer : data.get("writer").split(",")) { for (String writer : data.get("writer").split(",")) {
actors.add(new Person(writer, null, "Writer")); actors.add(new Person(writer, null, "Writer"));
} }
return new MovieInfo(fields, genres, new ArrayList<String>(0), actors, new ArrayList<Trailer>(0)); return new MovieInfo(fields, genres, new ArrayList<String>(0), actors, new ArrayList<Trailer>(0));
} }
} }