From 198b8b0e06b250d35c647de79d794a38c7d0df3c Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Fri, 29 Nov 2013 04:29:56 +0000 Subject: [PATCH] * separate long-term caches that have different update frequencies --- source/ehcache.xml | 18 +++- .../filebot/web/CachedXmlResource.java | 2 +- .../filebot/web/ETagCachedResource.java | 2 +- .../net/sourceforge/filebot/web/FanartTV.java | 89 ++++++----------- .../sourceforge/filebot/web/IMDbClient.java | 97 ++++++++----------- 5 files changed, 90 insertions(+), 118 deletions(-) diff --git a/source/ehcache.xml b/source/ehcache.xml index bcf263b7..0d1e2b29 100644 --- a/source/ehcache.xml +++ b/source/ehcache.xml @@ -46,12 +46,26 @@ memoryStoreEvictionPolicy="LRU" /> + + + { @Override protected Cache getCache() { - return CacheManager.getInstance().getCache("web-persistent-datasource"); + return CacheManager.getInstance().getCache("web-datasource-lv3"); } public Document getDocument() throws IOException { diff --git a/source/net/sourceforge/filebot/web/ETagCachedResource.java b/source/net/sourceforge/filebot/web/ETagCachedResource.java index 7a998446..ace6186d 100644 --- a/source/net/sourceforge/filebot/web/ETagCachedResource.java +++ b/source/net/sourceforge/filebot/web/ETagCachedResource.java @@ -45,7 +45,7 @@ public abstract class ETagCachedResource extends CachedR @Override protected Cache getCache() { - return CacheManager.getInstance().getCache("web-persistent-datasource"); + return CacheManager.getInstance().getCache("web-datasource-lv3"); } } diff --git a/source/net/sourceforge/filebot/web/FanartTV.java b/source/net/sourceforge/filebot/web/FanartTV.java index ecfd8cb6..fb6a3628 100644 --- a/source/net/sourceforge/filebot/web/FanartTV.java +++ b/source/net/sourceforge/filebot/web/FanartTV.java @@ -1,7 +1,5 @@ - package net.sourceforge.filebot.web; - import static net.sourceforge.filebot.web.WebRequest.*; import static net.sourceforge.tuned.XPathUtilities.*; @@ -25,47 +23,40 @@ import net.sourceforge.filebot.web.FanartTV.FanartDescriptor.FanartProperty; import org.w3c.dom.Document; import org.w3c.dom.Node; - public class FanartTV { - + private String apikey; - - + public FanartTV(String apikey) { this.apikey = apikey; } - - + public List getSeriesArtwork(int tvdbid) throws Exception { return getSeriesArtwork(String.valueOf(tvdbid), "all", 1, 2); } - - + public List getSeriesArtwork(String id, String type, int sort, int limit) throws Exception { return getArtwork("series", id, type, sort, limit); } - - + public List getMovieArtwork(int tmdbid) throws Exception { return getMovieArtwork(String.valueOf(tmdbid), "all", 1, 2); } - - + public List getMovieArtwork(String id, String type, int sort, int limit) throws Exception { return getArtwork("movie", id, type, sort, limit); } - - + public List getArtwork(String category, String id, String type, int sort, int limit) throws Exception { String resource = getResource(category, id, "xml", type, sort, limit); - + // cache results CachedResource data = new CachedResource(resource, FanartDescriptor[].class) { - + @Override public FanartDescriptor[] process(ByteBuffer data) throws Exception { Document dom = getDocument(Charset.forName("UTF-8").decode(data).toString()); - + List fanart = new ArrayList(); for (Node node : selectNodes("//*[@url]", dom)) { // e.g. @@ -79,61 +70,52 @@ public class FanartTV { } fanart.add(new FanartDescriptor(fields)); } - + return fanart.toArray(new FanartDescriptor[0]); } - - + @Override protected Cache getCache() { - return CacheManager.getInstance().getCache("web-datasource"); + return CacheManager.getInstance().getCache("web-datasource-lv2"); } }; - + return Arrays.asList(data.get()); } - - + public String getResource(String category, String id, String format, String type, int sort, int limit) throws MalformedURLException { // e.g. http://fanart.tv/webservice/series/780b986b22c35e6f7a134a2f392c2deb/70327/xml/all/1/2 return String.format("http://api.fanart.tv/webservice/%s/%s/%s/%s/%s/%s/%s", category, apikey, id, format, type, sort, limit); } - - + public static class FanartDescriptor implements Serializable { - + public static enum FanartProperty { type, id, url, lang, likes, season, disc_type } - + protected Map fields; - - + protected FanartDescriptor() { // used by serializer } - - + protected FanartDescriptor(Map fields) { this.fields = new EnumMap(fields); } - - + public String get(Object key) { return fields.get(FanartProperty.valueOf(key.toString())); } - - + public String get(FanartProperty key) { return fields.get(key); } - - + public String getType() { return fields.get(FanartProperty.type); } - - + public Integer getId() { try { return new Integer(fields.get(FanartProperty.id)); @@ -141,13 +123,11 @@ public class FanartTV { return null; } } - - + public String getName() { return new File(getUrl().getFile()).getName(); } - - + public URL getUrl() { try { return new URL(fields.get(FanartProperty.url).replaceAll(" ", "%20")); // work around server-side url encoding issues @@ -155,8 +135,7 @@ public class FanartTV { return null; } } - - + public Integer getLikes() { try { return new Integer(fields.get(FanartProperty.likes)); @@ -164,8 +143,7 @@ public class FanartTV { return null; } } - - + public Locale getLanguage() { try { return new Locale(fields.get(FanartProperty.lang)); @@ -173,8 +151,7 @@ public class FanartTV { return null; } } - - + public Integer getSeason() { try { return new Integer(fields.get(FanartProperty.season)); @@ -182,17 +159,15 @@ public class FanartTV { return null; } } - - + public String getDiskType() { return fields.get(FanartProperty.disc_type); } - - + @Override public String toString() { return fields.toString(); } } - + } diff --git a/source/net/sourceforge/filebot/web/IMDbClient.java b/source/net/sourceforge/filebot/web/IMDbClient.java index afec9b7e..76b44bbc 100644 --- a/source/net/sourceforge/filebot/web/IMDbClient.java +++ b/source/net/sourceforge/filebot/web/IMDbClient.java @@ -1,7 +1,5 @@ - package net.sourceforge.filebot.web; - import static net.sourceforge.filebot.web.WebRequest.*; import static net.sourceforge.tuned.XPathUtilities.*; @@ -38,59 +36,54 @@ import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.SAXException; - public class IMDbClient implements MovieIdentificationService { - + private String host = "www.imdb.com"; - - + @Override public String getName() { return "IMDb"; } - - + @Override public Icon getIcon() { return ResourceManager.getIcon("search.imdb"); } - - + protected int getImdbId(String link) { Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link); - + if (matcher.find()) { return Integer.parseInt(matcher.group(1)); } - + // pattern not found throw new IllegalArgumentException(String.format("Cannot find imdb id: %s", link)); } - - + @Override public List searchMovie(String query, Locale locale) throws Exception { Document dom = parsePage(new URL("http", host, "/find?s=tt&q=" + encode(query, false))); - + // select movie links followed by year in parenthesis List nodes = selectNodes("//TABLE[@class='findList']//TD/A[substring-after(substring-before(following::text(),')'),'(')]", dom); List results = new ArrayList(nodes.size()); - + for (Node node : nodes) { try { String name = node.getTextContent().trim(); if (name.startsWith("\"")) continue; - + String year = node.getNextSibling().getTextContent().trim().replaceFirst("^\\(I\\)", "").replaceAll("[\\p{Punct}\\p{Space}]+", ""); // remove non-number characters String href = getAttribute("href", node); - + results.add(new Movie(name, Integer.parseInt(year), getImdbId(href), -1)); } catch (Exception e) { // ignore illegal movies (TV Shows, Videos, Video Games, etc) } } - + // we might have been redirected to the movie page if (results.isEmpty()) { try { @@ -103,28 +96,26 @@ public class IMDbClient implements MovieIdentificationService { // ignore, can't find movie } } - + return results; } - - + protected Movie scrapeMovie(Document dom, Locale locale) { try { int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom)); String title = selectString("//META[@property='og:title']/@content", dom); - + Matcher titleMatcher = Pattern.compile("(.+)\\s\\((?i:tv.|video.)?(\\d{4})\\)$").matcher(title); if (!titleMatcher.matches()) return null; - + return new Movie(titleMatcher.group(1), Integer.parseInt(titleMatcher.group(2)), imdbid, -1); } catch (Exception e) { // ignore, we probably got redirected to an error page return null; } } - - + @Override public Movie getMovieDescriptor(int imdbid, Locale locale) throws Exception { try { @@ -133,56 +124,50 @@ public class IMDbClient implements MovieIdentificationService { return null; // illegal imdbid } } - - + protected Document parsePage(URL url) throws IOException, SAXException { CachedPage page = new CachedPage(url) { - + @Override protected Reader openConnection(URL url) throws IOException { URLConnection connection = url.openConnection(); - + // IMDb refuses default user agent (Java/1.6.0_12) => SPOOF GOOGLEBOT connection.addRequestProperty("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"); connection.addRequestProperty("From", "googlebot(at)googlebot.com"); connection.addRequestProperty("Accept", "*/*"); connection.addRequestProperty("X-Forwarded-For", "66.249.73.100"); // TRICK ANNOYING IMDB GEO-LOCATION LOCALIZATION - + return getReader(connection); } }; - + return getHtmlDocument(page.get()); } - - + public String scrape(String imdbid, String xpath) throws IOException, SAXException { return scrape(getMoviePageLink(getImdbId(imdbid)).toURL(), xpath); // helper for scraping data in user scripts } - - + public String scrape(URL url, String xpath) throws IOException, SAXException { return selectString(xpath, parsePage(url)); // helper for scraping data in user scripts } - - + public URI getMoviePageLink(int imdbId) { return URI.create(String.format("http://www.imdb.com/title/tt%07d/", imdbId)); } - - + @Override public Map getMovieDescriptors(Collection movieFiles, Locale locale) throws Exception { throw new UnsupportedOperationException(); } - - + @SuppressWarnings({ "unchecked", "rawtypes" }) public Map getImdbApiData(Integer i, String t, String y, boolean tomatoes) throws IOException { // e.g. http://www.imdbapi.com/?i=tt0379786&r=xml&tomatoes=true String url = String.format("http://www.omdbapi.com/?i=%s&t=%s&y=%s&r=xml&tomatoes=%s", String.format(i == null ? "" : "tt%07d", i), t, y, tomatoes); CachedResource data = new CachedResource(url, HashMap.class) { - + @Override public HashMap process(ByteBuffer data) throws Exception { Document xml = getDocument(Charset.forName("UTF-8").decode(data).toString()); @@ -192,26 +177,24 @@ public class IMDbClient implements MovieIdentificationService { } return attr; } - - + @Override protected Cache getCache() { - return CacheManager.getInstance().getCache("web-datasource"); + return CacheManager.getInstance().getCache("web-datasource-lv2"); } }; - + return data.get(); } - - + public MovieInfo getImdbApiMovieInfo(Movie movie) throws IOException { Map data = movie.getImdbId() > 0 ? getImdbApiData(movie.getImdbId(), "", "", false) : getImdbApiData(null, movie.getName(), String.valueOf(movie.getYear()), false); - + // sanity check if (!Boolean.parseBoolean(data.get("response"))) { throw new IllegalArgumentException("Movie not found: " + data); } - + Map fields = new EnumMap(MovieProperty.class); fields.put(MovieProperty.title, data.get("title")); fields.put(MovieProperty.certification, data.get("rated")); @@ -221,7 +204,7 @@ public class IMDbClient implements MovieIdentificationService { fields.put(MovieProperty.vote_count, data.get("imdbVotes").replaceAll("\\D", "")); fields.put(MovieProperty.imdb_id, data.get("imdbID")); fields.put(MovieProperty.poster_path, data.get("poster")); - + // convert release date to yyyy-MM-dd Date released = Date.parse(data.get("released"), "dd MMM yyyy"); if (released != null) { @@ -232,25 +215,25 @@ public class IMDbClient implements MovieIdentificationService { fields.put(MovieProperty.release_date, year.format("yyyy-MM-dd")); } } - + List genres = new ArrayList(); for (String it : data.get("genre").split(",")) { genres.add(it.trim()); } - + List actors = new ArrayList(); for (String it : data.get("actors").split(",")) { actors.add(new Person(it.trim(), null, null)); } - + for (String director : data.get("director").split(",")) { actors.add(new Person(director, null, "Director")); } - + for (String writer : data.get("writer").split(",")) { actors.add(new Person(writer, null, "Writer")); } - + return new MovieInfo(fields, genres, new ArrayList(0), actors, new ArrayList(0)); } }