From c227ec4bd97fa896f710e775d5a4bd608699cd7d Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Sat, 7 Sep 2013 15:48:24 +0000 Subject: [PATCH] + support n-alias data files --- BuildData.groovy | 9 +- .../net/sourceforge/filebot/WebServices.java | 10 +- .../filebot/media/MediaDetection.java | 13 +- .../filebot/media/ReleaseInfo.java | 47 +++---- .../sourceforge/filebot/web/AnidbClient.java | 115 ++++++++---------- .../filebot/web/AnidbSearchResult.java | 8 +- .../sourceforge/filebot/web/LocalSearch.java | 70 +++++------ .../sourceforge/filebot/web/SearchResult.java | 17 +++ .../filebot/web/SerienjunkiesClient.java | 107 ++++++++-------- .../web/SerienjunkiesSearchResult.java | 53 ++------ .../net/sourceforge/tuned/FileUtilities.java | 13 ++ 11 files changed, 222 insertions(+), 240 deletions(-) diff --git a/BuildData.groovy b/BuildData.groovy index 594cebf1..c03be0e8 100644 --- a/BuildData.groovy +++ b/BuildData.groovy @@ -157,10 +157,15 @@ if (thetvdb_txt.size() < 30000) { throw new Exception('TheTVDB index sanity fail // BUILD anidb-index.gz def anidb = new net.sourceforge.filebot.web.AnidbClient(null, 0).getAnimeTitles() -def anidb_index = anidb.findResults{ [it.getAnimeId(), it.getPrimaryTitle(), it.getEnglishTitle()] } +def anidb_index = anidb.findResults{ + def row = [] + row += it.getAnimeId().pad(5) + row += it.names*.replaceAll(/\s+/, ' ')*.replaceAll(/['`´‘’ʻ]+/, /'/)*.trim().unique() + return row +} // join and sort -def anidb_txt = anidb_index.findResults{ [it[0].pad(5), it[1] ?: '', it[2] == null || it[2].equals(it[1]) ? '' : it[2]]*.replaceAll(/\s+/, ' ')*.trim().join('\t').replaceAll(/['`´‘’ʻ]+/, /'/) }.sort().unique() +def anidb_txt = anidb_index.findResults{ row -> row.join('\t') }.sort().unique() pack(anidb_out, anidb_txt) println "AniDB Index: " + anidb_txt.size() diff --git a/source/net/sourceforge/filebot/WebServices.java b/source/net/sourceforge/filebot/WebServices.java index 65cb8039..47b3f010 100644 --- a/source/net/sourceforge/filebot/WebServices.java +++ b/source/net/sourceforge/filebot/WebServices.java @@ -2,9 +2,11 @@ package net.sourceforge.filebot; -import static java.util.Arrays.*; -import static java.util.Collections.*; -import static net.sourceforge.filebot.Settings.*; +import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static net.sourceforge.filebot.Settings.getApplicationName; +import static net.sourceforge.filebot.Settings.getApplicationProperty; +import static net.sourceforge.filebot.Settings.getApplicationVersion; import java.io.IOException; import java.util.ArrayList; @@ -142,7 +144,7 @@ public final class WebServices { @Override protected Set getFields(SearchResult object) { - return set(object.getName()); + return set(object.getNames()); } }; diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index aa376e51..01164ec2 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -61,7 +61,6 @@ import net.sourceforge.filebot.similarity.SequenceMatchSimilarity; import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.similarity.SimilarityComparator; import net.sourceforge.filebot.similarity.SimilarityMetric; -import net.sourceforge.filebot.web.AnidbSearchResult; import net.sourceforge.filebot.web.Date; import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Movie; @@ -377,13 +376,11 @@ public class MediaDetection { public static synchronized List> getSeriesIndex() throws IOException { if (seriesIndex.isEmpty()) { try { - for (TheTVDBSearchResult it : releaseInfo.getTheTVDBIndex()) { - seriesIndex.add(new SimpleEntry(normalizePunctuation(it.getName()).toLowerCase(), it)); - } - for (AnidbSearchResult it : releaseInfo.getAnidbIndex()) { - seriesIndex.add(new SimpleEntry(normalizePunctuation(it.getPrimaryTitle()).toLowerCase(), it)); - if (it.getEnglishTitle() != null) { - seriesIndex.add(new SimpleEntry(normalizePunctuation(it.getEnglishTitle()).toLowerCase(), it)); + for (SearchResult[] index : new SearchResult[][] { releaseInfo.getTheTVDBIndex(), releaseInfo.getAnidbIndex() }) { + for (SearchResult item : index) { + for (String name : item.getNames()) { + seriesIndex.add(new SimpleEntry(normalizePunctuation(name).toLowerCase(), item)); + } } } } catch (Exception e) { diff --git a/source/net/sourceforge/filebot/media/ReleaseInfo.java b/source/net/sourceforge/filebot/media/ReleaseInfo.java index 1d65c864..b145e0d0 100644 --- a/source/net/sourceforge/filebot/media/ReleaseInfo.java +++ b/source/net/sourceforge/filebot/media/ReleaseInfo.java @@ -1,12 +1,15 @@ package net.sourceforge.filebot.media; +import static java.lang.Integer.parseInt; import static java.util.Arrays.asList; +import static java.util.Arrays.copyOfRange; import static java.util.Collections.unmodifiableMap; import static java.util.ResourceBundle.getBundle; import static java.util.regex.Pattern.CASE_INSENSITIVE; import static java.util.regex.Pattern.UNICODE_CASE; import static java.util.regex.Pattern.compile; import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation; +import static net.sourceforge.tuned.FileUtilities.readCSV; import static net.sourceforge.tuned.StringUtilities.join; import java.io.File; @@ -26,7 +29,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Scanner; import java.util.Set; import java.util.TreeMap; import java.util.regex.Matcher; @@ -274,14 +276,15 @@ public class ReleaseInfo { @Override public Movie[] process(ByteBuffer data) throws IOException { - Scanner scanner = new Scanner(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n"); + List rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t"); + List movies = new ArrayList(rows.size()); - List movies = new ArrayList(); - while (scanner.hasNext()) { - int imdbid = scanner.nextInt(); - String name = scanner.next().trim(); - int year = scanner.nextInt(); - movies.add(new Movie(name, year, imdbid, -1)); + for (String[] row : rows) { + int imdbid = parseInt(row[0]); + int year = parseInt(row[1]); + String name = row[2]; + String[] aliasNames = copyOfRange(row, 3, row.length); + movies.add(new Movie(name, aliasNames, year, imdbid, -1)); } return movies.toArray(new Movie[0]); @@ -296,13 +299,14 @@ public class ReleaseInfo { @Override public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException { - Scanner scanner = new Scanner(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n"); + List rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t"); + List tvshows = new ArrayList(rows.size()); - List tvshows = new ArrayList(); - while (scanner.hasNext() && scanner.hasNextInt()) { - int id = scanner.nextInt(); - String name = scanner.next().trim(); - tvshows.add(new TheTVDBSearchResult(name, id)); + for (String[] row : rows) { + int id = parseInt(row[0]); + String name = row[1]; + String[] aliasNames = copyOfRange(row, 2, row.length); + tvshows.add(new TheTVDBSearchResult(name, aliasNames, id)); } return tvshows.toArray(new TheTVDBSearchResult[0]); @@ -317,15 +321,14 @@ public class ReleaseInfo { @Override public AnidbSearchResult[] process(ByteBuffer data) throws IOException { - Scanner scanner = new Scanner(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n"); + List rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t"); + List anime = new ArrayList(rows.size()); - List anime = new ArrayList(); - while (scanner.hasNext() && scanner.hasNextInt()) { - int aid = scanner.nextInt(); - String primaryTitle = scanner.next().trim(); - String englishTitle = scanner.next().trim(); - - anime.add(new AnidbSearchResult(aid, primaryTitle, englishTitle.isEmpty() ? null : englishTitle)); + for (String[] row : rows) { + int aid = parseInt(row[0]); + String primaryTitle = row[1]; + String[] aliasNames = copyOfRange(row, 2, row.length); + anime.add(new AnidbSearchResult(aid, primaryTitle, aliasNames)); } return anime.toArray(new AnidbSearchResult[0]); diff --git a/source/net/sourceforge/filebot/web/AnidbClient.java b/source/net/sourceforge/filebot/web/AnidbClient.java index bce7c2b9..e496b640 100644 --- a/source/net/sourceforge/filebot/web/AnidbClient.java +++ b/source/net/sourceforge/filebot/web/AnidbClient.java @@ -1,10 +1,12 @@ - package net.sourceforge.filebot.web; - -import static net.sourceforge.filebot.web.EpisodeUtilities.*; -import static net.sourceforge.filebot.web.WebRequest.*; -import static net.sourceforge.tuned.XPathUtilities.*; +import static net.sourceforge.filebot.web.EpisodeUtilities.sortEpisodes; +import static net.sourceforge.filebot.web.WebRequest.getDocument; +import static net.sourceforge.tuned.XPathUtilities.getAttribute; +import static net.sourceforge.tuned.XPathUtilities.getChild; +import static net.sourceforge.tuned.XPathUtilities.getTextContent; +import static net.sourceforge.tuned.XPathUtilities.selectNodes; +import static net.sourceforge.tuned.XPathUtilities.selectString; import java.net.URI; import java.net.URISyntaxException; @@ -30,108 +32,98 @@ import net.sourceforge.filebot.ResourceManager; import org.w3c.dom.Document; import org.w3c.dom.Node; - public class AnidbClient extends AbstractEpisodeListProvider { - + private static final FloodLimit REQUEST_LIMIT = new FloodLimit(5, 12, TimeUnit.SECONDS); // no more than 5 requests within a 10 second window (+2 seconds for good measure) - + private final String host = "anidb.net"; - + private final String client; private final int clientver; - - + public AnidbClient(String client, int clientver) { this.client = client; this.clientver = clientver; } - - + @Override public String getName() { return "AniDB"; } - - + @Override public Icon getIcon() { return ResourceManager.getIcon("search.anidb"); } - - + @Override public boolean hasSingleSeasonSupport() { return false; } - - + @Override public boolean hasLocaleSupport() { return true; } - - + @Override public ResultCache getCache() { return new ResultCache(host, Cache.getCache("web-datasource-lv2")); } - - + @Override public List search(String query, final Locale locale) throws Exception { // bypass automatic caching since search is based on locally cached data anyway return fetchSearchResult(query, locale); } - - + @Override public List fetchSearchResult(String query, final Locale locale) throws Exception { - LocalSearch index = new LocalSearch(getAnimeTitles()) { - + LocalSearch index = new LocalSearch(getAnimeTitles()) { + @Override - protected Set getFields(AnidbSearchResult anime) { - return set(anime.getPrimaryTitle(), anime.getEnglishTitle()); + protected Set getFields(SearchResult it) { + return set(it.getNames()); } }; - + return new ArrayList(index.search(query)); } - - + @Override public List fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale language) throws Exception { AnidbSearchResult anime = (AnidbSearchResult) searchResult; - + // e.g. http://api.anidb.net:9001/httpapi?request=anime&client=filebot&clientver=1&protover=1&aid=4521 URL url = new URL("http", "api." + host, 9001, "/httpapi?request=anime&client=" + client + "&clientver=" + clientver + "&protover=1&aid=" + anime.getAnimeId()); - + // respect flood protection limits REQUEST_LIMIT.acquirePermit(); - + // get anime page as xml Document dom = getDocument(url); - + // select main title and anime start date Date seriesStartDate = Date.parse(selectString("//startdate", dom), "yyyy-MM-dd"); String animeTitle = selectString("//titles/title[@type='official' and @lang='" + language.getLanguage() + "']", dom); if (animeTitle.isEmpty()) { animeTitle = selectString("//titles/title[@type='main']", dom); } - + List episodes = new ArrayList(25); - + for (Node node : selectNodes("//episode", dom)) { Node epno = getChild("epno", node); int number = Integer.parseInt(getTextContent(epno).replaceAll("\\D", "")); int type = Integer.parseInt(getAttribute("type", epno)); - + if (type == 1 || type == 2) { Date airdate = Date.parse(getTextContent("airdate", node), "yyyy-MM-dd"); String title = selectString(".//title[@lang='" + language.getLanguage() + "']", node); if (title.isEmpty()) { // English language fall-back title = selectString(".//title[@lang='en']", node); } - + if (type == 1) { episodes.add(new Episode(animeTitle, seriesStartDate, null, number, title, number, null, airdate, searchResult)); // normal episode, no seasons for anime } else { @@ -139,20 +131,19 @@ public class AnidbClient extends AbstractEpisodeListProvider { } } } - + // make sure episodes are in ordered correctly sortEpisodes(episodes); - - // sanity check + + // sanity check if (episodes.isEmpty()) { // anime page xml doesn't work sometimes throw new RuntimeException(String.format("Failed to parse episode data from xml: %s (%d)", anime, anime.getAnimeId())); } - + return episodes; } - - + @Override public URI getEpisodeListLink(SearchResult searchResult) { try { @@ -161,39 +152,38 @@ public class AnidbClient extends AbstractEpisodeListProvider { throw new RuntimeException(e); } } - - + public synchronized List getAnimeTitles() throws Exception { URL url = new URL("http", host, "/api/anime-titles.dat.gz"); ResultCache cache = getCache(); - + @SuppressWarnings("unchecked") List anime = (List) cache.getSearchResult(null, Locale.ROOT); if (anime != null) { return anime; } - + // ||| // type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language) Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$"); - + Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>(); Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>(); Map<Integer, Map<String, String>> synonymsTitleMap = new HashMap<Integer, Map<String, String>>(); - + // fetch data Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8"); - + try { while (scanner.hasNextLine()) { Matcher matcher = pattern.matcher(scanner.nextLine()); - + if (matcher.matches()) { int aid = Integer.parseInt(matcher.group(1)); String type = matcher.group(2); String language = matcher.group(3); String title = matcher.group(4); - + if (type.equals("1")) { primaryTitleMap.put(aid, title); } else if (type.equals("2") || type.equals("4")) { @@ -203,7 +193,7 @@ public class AnidbClient extends AbstractEpisodeListProvider { languageTitleMap = new HashMap<String, String>(); titleMap.put(aid, languageTitleMap); } - + languageTitleMap.put(language, title); } } @@ -211,10 +201,10 @@ public class AnidbClient extends AbstractEpisodeListProvider { } finally { scanner.close(); } - + // build up a list of all possible AniDB search results anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size()); - + for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) { Map<String, String> localizedTitles = new HashMap<String, String>(); if (synonymsTitleMap.containsKey(entry.getKey())) { @@ -223,12 +213,13 @@ public class AnidbClient extends AbstractEpisodeListProvider { if (officialTitleMap.containsKey(entry.getKey())) { localizedTitles.putAll(officialTitleMap.get(entry.getKey())); // primarily use official title if available } - - anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), localizedTitles.get("en"))); + + String englishTitle = localizedTitles.get("en"); // ONLY SUPPORT ENGLISH LOCALIZATION + anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitle == null || englishTitle.isEmpty() ? new String[] {} : new String[] { englishTitle })); } - + // populate cache return cache.putSearchResult(null, Locale.ROOT, anime); } - + } diff --git a/source/net/sourceforge/filebot/web/AnidbSearchResult.java b/source/net/sourceforge/filebot/web/AnidbSearchResult.java index f942f4e3..63bfea52 100644 --- a/source/net/sourceforge/filebot/web/AnidbSearchResult.java +++ b/source/net/sourceforge/filebot/web/AnidbSearchResult.java @@ -8,8 +8,8 @@ public class AnidbSearchResult extends SearchResult { // used by serializer } - public AnidbSearchResult(int aid, String primaryTitle, String englishTitle) { - super(primaryTitle, englishTitle); + public AnidbSearchResult(int aid, String primaryTitle, String[] localizedTitles) { + super(primaryTitle, localizedTitles); this.aid = aid; } @@ -30,10 +30,6 @@ public class AnidbSearchResult extends SearchResult { return name; } - public String getEnglishTitle() { - return aliasNames.length > 0 ? aliasNames[0] : null; - } - @Override public int hashCode() { return aid; diff --git a/source/net/sourceforge/filebot/web/LocalSearch.java b/source/net/sourceforge/filebot/web/LocalSearch.java index 3239b09f..1f9bc121 100644 --- a/source/net/sourceforge/filebot/web/LocalSearch.java +++ b/source/net/sourceforge/filebot/web/LocalSearch.java @@ -1,9 +1,8 @@ - package net.sourceforge.filebot.web; - -import static java.util.Collections.*; -import static net.sourceforge.filebot.similarity.Normalization.*; +import static java.util.Collections.singleton; +import static java.util.Collections.sort; +import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation; import java.util.AbstractList; import java.util.AbstractMap.SimpleEntry; @@ -25,61 +24,58 @@ import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; import com.ibm.icu.text.Transliterator; - public class LocalSearch<T> { - + private final AbstractStringMetric metric = new QGramsDistance(); private float resultMinimumSimilarity = 0.5f; private int resultSetSize = 20; - + private final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove"); - + private final List<T> objects; private final List<Set<String>> fields; - - + public LocalSearch(Collection<? extends T> data) { objects = new ArrayList<T>(data); fields = new ArrayList<Set<String>>(objects.size()); - + for (int i = 0; i < objects.size(); i++) { fields.add(i, getFields(objects.get(i))); } } - - + public List<T> search(String query) throws ExecutionException, InterruptedException { final String q = normalize(query); List<Callable<Entry<T, Float>>> tasks = new ArrayList<Callable<Entry<T, Float>>>(objects.size()); - + for (int i = 0; i < objects.size(); i++) { final int index = i; tasks.add(new Callable<Entry<T, Float>>() { - + @Override public Entry<T, Float> call() throws Exception { float similarity = 0; boolean match = false; - + for (String field : fields.get(index)) { match |= field.contains(q); similarity = Math.max(metric.getSimilarity(q, field), similarity); } - + return match || similarity > resultMinimumSimilarity ? new SimpleEntry<T, Float>(objects.get(index), similarity) : null; } }); } - + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); final List<Entry<T, Float>> resultSet = new ArrayList<Entry<T, Float>>(objects.size()); - + try { for (Future<Entry<T, Float>> entry : executor.invokeAll(tasks)) { if (entry.get() != null) { resultSet.add(entry.get()); } - + if (Thread.interrupted()) { throw new InterruptedException(); } @@ -87,50 +83,45 @@ public class LocalSearch<T> { } finally { executor.shutdownNow(); } - + // sort by similarity descending (best matches first) sort(resultSet, new Comparator<Entry<T, Float>>() { - + @Override public int compare(Entry<T, Float> o1, Entry<T, Float> o2) { return o2.getValue().compareTo(o1.getValue()); } }); - + // view for the first 20 search results return new AbstractList<T>() { - + @Override public T get(int index) { return resultSet.get(index).getKey(); } - - + @Override public int size() { return Math.min(resultSetSize, resultSet.size()); } }; } - - + public void setResultMinimumSimilarity(float resultMinimumSimilarity) { this.resultMinimumSimilarity = resultMinimumSimilarity; } - - + public void setResultSetSize(int resultSetSize) { this.resultSetSize = resultSetSize; } - - + protected Set<String> getFields(T object) { - return set(object.toString()); + return set(singleton(object.toString())); } - - - protected Set<String> set(String... values) { - Set<String> set = new HashSet<String>(values.length); + + protected Set<String> set(Collection<String> values) { + Set<String> set = new HashSet<String>(values.size()); for (String value : values) { if (value != null) { set.add(normalize(value)); @@ -138,11 +129,10 @@ public class LocalSearch<T> { } return set; } - - + protected String normalize(String value) { // normalize separator, normalize case and trim return normalizePunctuation(transliterator.transform(value)).toLowerCase(); } - + } diff --git a/source/net/sourceforge/filebot/web/SearchResult.java b/source/net/sourceforge/filebot/web/SearchResult.java index d97d0340..2ae52ec5 100644 --- a/source/net/sourceforge/filebot/web/SearchResult.java +++ b/source/net/sourceforge/filebot/web/SearchResult.java @@ -1,6 +1,8 @@ package net.sourceforge.filebot.web; import java.io.Serializable; +import java.util.AbstractList; +import java.util.List; public abstract class SearchResult implements Serializable { @@ -24,6 +26,21 @@ public abstract class SearchResult implements Serializable { return aliasNames.clone(); } + public List<String> getNames() { + return new AbstractList<String>() { + + @Override + public String get(int index) { + return index == 0 ? name : aliasNames[index - 1]; + } + + @Override + public int size() { + return 1 + aliasNames.length; + } + }; + } + @Override public String toString() { return name; diff --git a/source/net/sourceforge/filebot/web/SerienjunkiesClient.java b/source/net/sourceforge/filebot/web/SerienjunkiesClient.java index d0eb6c53..30b73256 100644 --- a/source/net/sourceforge/filebot/web/SerienjunkiesClient.java +++ b/source/net/sourceforge/filebot/web/SerienjunkiesClient.java @@ -1,15 +1,15 @@ - package net.sourceforge.filebot.web; - -import static net.sourceforge.filebot.web.EpisodeUtilities.*; -import static net.sourceforge.filebot.web.WebRequest.*; +import static net.sourceforge.filebot.web.EpisodeUtilities.sortEpisodes; +import static net.sourceforge.filebot.web.WebRequest.createIgnoreCertificateSocketFactory; +import static net.sourceforge.filebot.web.WebRequest.getReader; import java.io.IOException; import java.io.Reader; import java.net.URI; import java.net.URL; import java.util.ArrayList; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Set; @@ -24,142 +24,140 @@ import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.JSONValue; - public class SerienjunkiesClient extends AbstractEpisodeListProvider { - + private final String host = "api.serienjunkies.de"; - + private final String apikey; - - + public SerienjunkiesClient(String apikey) { this.apikey = apikey; } - - + @Override public String getName() { return "Serienjunkies"; } - - + @Override public Icon getIcon() { return ResourceManager.getIcon("search.serienjunkies"); } - - + @Override public Locale getDefaultLocale() { return Locale.GERMAN; } - - + @Override public ResultCache getCache() { return new ResultCache(host, Cache.getCache("web-datasource")); } - - + @Override public List<SearchResult> search(String query, final Locale locale) throws Exception { // bypass automatic caching since search is based on locally cached data anyway return fetchSearchResult(query, locale); } - - + @Override public List<SearchResult> fetchSearchResult(String query, Locale locale) throws Exception { - LocalSearch<SerienjunkiesSearchResult> index = new LocalSearch<SerienjunkiesSearchResult>(getSeriesTitles()) { - + LocalSearch<SearchResult> index = new LocalSearch<SearchResult>(getSeriesTitles()) { + @Override - protected Set<String> getFields(SerienjunkiesSearchResult series) { - return set(series.getMainTitle(), series.getGermanTitle()); + protected Set<String> getFields(SearchResult series) { + return set(series.getNames()); } }; - + return new ArrayList<SearchResult>(index.search(query)); } - - + protected synchronized List<SerienjunkiesSearchResult> getSeriesTitles() throws IOException { ResultCache cache = getCache(); - + @SuppressWarnings("unchecked") List<SerienjunkiesSearchResult> seriesList = (List) cache.getSearchResult(null, Locale.ROOT); if (seriesList != null) { return seriesList; } - + // fetch series data seriesList = new ArrayList<SerienjunkiesSearchResult>(); - + JSONObject data = (JSONObject) request("/allseries.php?d=" + apikey); JSONArray list = (JSONArray) data.get("allseries"); - + for (Object element : list) { JSONObject obj = (JSONObject) element; - + Integer sid = new Integer((String) obj.get("id")); String link = (String) obj.get("link"); String mainTitle = (String) obj.get("short"); String germanTitle = (String) obj.get("short_german"); Date startDate = Date.parse((String) obj.get("firstepisode"), "yyyy-MM-dd"); - - seriesList.add(new SerienjunkiesSearchResult(sid, link, mainTitle, germanTitle != null && !germanTitle.isEmpty() ? germanTitle : null, startDate)); + + Set<String> titleSet = new LinkedHashSet<String>(2); + for (String title : new String[] { germanTitle, mainTitle }) { + if (title != null && title.length() > 0) { + titleSet.add(title); + } + } + if (titleSet.size() > 0) { + List<String> titleList = new ArrayList<String>(titleSet); + seriesList.add(new SerienjunkiesSearchResult(sid, link, titleList.get(0), titleList.subList(1, titleList.size()).toArray(new String[0]), startDate)); + } } - + // populate cache return cache.putSearchResult(null, Locale.ROOT, seriesList); } - - + @Override public List<Episode> fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale locale) throws IOException { SerienjunkiesSearchResult series = (SerienjunkiesSearchResult) searchResult; - + // fetch episode data List<Episode> episodes = new ArrayList<Episode>(25); - - String seriesName = locale.equals(Locale.GERMAN) && series.getGermanTitle() != null ? series.getGermanTitle() : series.getMainTitle(); + + String seriesName = series.getName(); JSONObject data = (JSONObject) request("/allepisodes.php?d=" + apikey + "&q=" + series.getSeriesId()); JSONArray list = (JSONArray) data.get("allepisodes"); - + for (int i = 0; i < list.size(); i++) { JSONObject obj = (JSONObject) list.get(i); - + Integer season = new Integer((String) obj.get("season")); Integer episode = new Integer((String) obj.get("episode")); Date airdate = Date.parse((String) ((JSONObject) obj.get("airdates")).get("premiere"), "yyyy-MM-dd"); - + String title = (String) obj.get("original"); String german = (String) obj.get("german"); if (title == null || (Locale.GERMAN.equals(locale) && german != null)) { title = german; } - + // enforce sanity if (title == null) { title = ""; } - + episodes.add(new Episode(seriesName, series.getStartDate(), season, episode, title, i + 1, null, airdate, searchResult)); } - + // make sure episodes are in ordered correctly sortEpisodes(episodes); - + return episodes; } - - + protected Object request(String resource) throws IOException { URL url = new URL("https", host, resource); HttpsURLConnection connection = (HttpsURLConnection) url.openConnection(); - + // disable SSL certificate validation connection.setSSLSocketFactory(createIgnoreCertificateSocketFactory()); - + // fetch and parse JSON data Reader reader = getReader(connection); try { @@ -168,11 +166,10 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider { reader.close(); } } - - + @Override public URI getEpisodeListLink(SearchResult searchResult) { return URI.create(String.format("http://www.serienjunkies.de/%s/alle-serien-staffeln.html", ((SerienjunkiesSearchResult) searchResult).getLink())); } - + } diff --git a/source/net/sourceforge/filebot/web/SerienjunkiesSearchResult.java b/source/net/sourceforge/filebot/web/SerienjunkiesSearchResult.java index cc5a4904..bae060d0 100644 --- a/source/net/sourceforge/filebot/web/SerienjunkiesSearchResult.java +++ b/source/net/sourceforge/filebot/web/SerienjunkiesSearchResult.java @@ -1,79 +1,50 @@ - package net.sourceforge.filebot.web; - public class SerienjunkiesSearchResult extends SearchResult { - + protected int sid; protected String link; - protected String mainTitle; - protected String germanTitle; protected Date startDate; - - + protected SerienjunkiesSearchResult() { // used by serializer } - - - public SerienjunkiesSearchResult(int sid, String link, String mainTitle, String germanTitle, Date startDate) { + + public SerienjunkiesSearchResult(int sid, String link, String germanTitle, String[] otherTitles, Date startDate) { + super(germanTitle, otherTitles); this.sid = sid; this.link = link; - this.mainTitle = mainTitle; - this.germanTitle = germanTitle; this.startDate = startDate; } - - + public int getId() { return sid; } - - - @Override - public String getName() { - return germanTitle != null ? germanTitle : mainTitle; // prefer German title - } - - + public int getSeriesId() { return sid; } - - + public String getLink() { return link; } - - - public String getMainTitle() { - return mainTitle; - } - - - public String getGermanTitle() { - return germanTitle; - } - - + public Date getStartDate() { return startDate; } - - + @Override public int hashCode() { return sid; } - - + @Override public boolean equals(Object object) { if (object instanceof SerienjunkiesSearchResult) { SerienjunkiesSearchResult other = (SerienjunkiesSearchResult) object; return this.sid == other.sid; } - + return false; } } \ No newline at end of file diff --git a/source/net/sourceforge/tuned/FileUtilities.java b/source/net/sourceforge/tuned/FileUtilities.java index 3baed578..9cc8b650 100644 --- a/source/net/sourceforge/tuned/FileUtilities.java +++ b/source/net/sourceforge/tuned/FileUtilities.java @@ -21,6 +21,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Scanner; import java.util.SortedMap; import java.util.TreeMap; import java.util.regex.Matcher; @@ -160,6 +161,18 @@ public final class FileUtilities { } } + public static List<String[]> readCSV(InputStream source, String charsetName, String separatorPattern) { + Scanner scanner = new Scanner(source, charsetName); + Pattern separator = Pattern.compile(separatorPattern); + List<String[]> rows = new ArrayList<String[]>(65536); + + while (scanner.hasNextLine()) { + rows.add(separator.split(scanner.nextLine())); + } + + return rows; + } + public static Reader createTextReader(File file) throws IOException { CharsetDetector detector = new CharsetDetector(); detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding