1
0
mirror of https://github.com/mitb-archive/filebot synced 2024-08-13 17:03:45 -04:00

* extract local search into it's own class

* AniDB: search by any language
* SJ: use german series name if language is set to GERMAN
This commit is contained in:
Reinhard Pointner 2011-11-04 05:23:23 +00:00
parent 15b90ebf73
commit 5184e4d98d
5 changed files with 200 additions and 130 deletions

View File

@ -23,17 +23,22 @@ public abstract class AbstractEpisodeListProvider implements EpisodeListProvider
public List<SearchResult> search(String query) throws Exception {
return search(query, Locale.ENGLISH);
return search(query, getDefaultLocale());
}
public List<Episode> getEpisodeList(SearchResult searchResult) throws Exception {
return getEpisodeList(searchResult, Locale.ENGLISH);
return getEpisodeList(searchResult, getDefaultLocale());
}
public List<Episode> getEpisodeList(SearchResult searchResult, int season) throws Exception {
return getEpisodeList(searchResult, season, Locale.ENGLISH);
return getEpisodeList(searchResult, season, getDefaultLocale());
}
public Locale getDefaultLocale() {
return Locale.ENGLISH;
}

View File

@ -10,19 +10,15 @@ import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.AbstractMap.SimpleEntry;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -33,9 +29,6 @@ import javax.swing.Icon;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
@ -82,54 +75,16 @@ public class AnidbClient extends AbstractEpisodeListProvider {
@Override
public List<SearchResult> search(String query, Locale locale) throws Exception {
// normalize
query = query.toLowerCase();
AbstractStringMetric metric = new QGramsDistance();
final List<Entry<SearchResult, Float>> resultSet = new ArrayList<Entry<SearchResult, Float>>();
for (AnidbSearchResult anime : getAnimeTitles()) {
for (String name : new String[] { anime.getMainTitle(), anime.getEnglishTitle() }) {
if (name != null) {
// normalize
name = name.toLowerCase();
float similarity = metric.getSimilarity(name, query);
if (similarity > 0.5 || name.contains(query)) {
resultSet.add(new SimpleEntry<SearchResult, Float>(anime, similarity));
// add only once
break;
}
}
}
}
// sort by similarity descending (best matches first)
Collections.sort(resultSet, new Comparator<Entry<SearchResult, Float>>() {
public List<SearchResult> search(String query, final Locale locale) throws Exception {
LocalSearch<AnidbSearchResult> index = new LocalSearch<AnidbSearchResult>(getAnimeTitles()) {
@Override
public int compare(Entry<SearchResult, Float> o1, Entry<SearchResult, Float> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
// view for the first 20 search results
return new AbstractList<SearchResult>() {
@Override
public SearchResult get(int index) {
return resultSet.get(index).getKey();
}
@Override
public int size() {
return Math.min(20, resultSet.size());
protected Set<String> getFields(AnidbSearchResult anime) {
return set(anime.getPrimaryTitle(), anime.getOfficialTitle("en"), anime.getOfficialTitle(locale.getLanguage()));
}
};
return new ArrayList<SearchResult>(index.search(query));
}
@ -225,8 +180,8 @@ public class AnidbClient extends AbstractEpisodeListProvider {
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
Map<Integer, String> primaryTitleMap = new TreeMap<Integer, String>();
Map<Integer, String> englishTitleMap = new HashMap<Integer, String>();
Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>();
Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>();
// fetch data
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
@ -236,10 +191,21 @@ public class AnidbClient extends AbstractEpisodeListProvider {
Matcher matcher = pattern.matcher(scanner.nextLine());
if (matcher.matches()) {
if (matcher.group(2).equals("1")) {
primaryTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4));
} else if (matcher.group(2).equals("4") && matcher.group(3).equals("en")) {
englishTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4));
int aid = Integer.parseInt(matcher.group(1));
String type = matcher.group(2);
String language = matcher.group(3);
String title = matcher.group(4);
if (type.equals("1")) {
primaryTitleMap.put(aid, title);
} else if (type.equals("4")) {
Map<String, String> languageTitleMap = officialTitleMap.get(aid);
if (languageTitleMap == null) {
languageTitleMap = new HashMap<String, String>();
officialTitleMap.put(aid, languageTitleMap);
}
languageTitleMap.put(language, title);
}
}
}
@ -247,11 +213,11 @@ public class AnidbClient extends AbstractEpisodeListProvider {
scanner.close();
}
// build up a list of all possible anidb search results
// build up a list of all possible AniDB search results
anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitleMap.get(entry.getKey())));
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), officialTitleMap.get(entry.getKey())));
}
// populate cache
@ -264,8 +230,8 @@ public class AnidbClient extends AbstractEpisodeListProvider {
public static class AnidbSearchResult extends SearchResult implements Serializable {
protected int aid;
protected String mainTitle;
protected String englishTitle;
protected String primaryTitle; // one per anime
protected Map<String, String> officialTitle; // one per language
protected AnidbSearchResult() {
@ -273,10 +239,10 @@ public class AnidbClient extends AbstractEpisodeListProvider {
}
public AnidbSearchResult(int aid, String mainTitle, String englishTitle) {
public AnidbSearchResult(int aid, String primaryTitle, Map<String, String> officialTitle) {
this.aid = aid;
this.mainTitle = mainTitle;
this.englishTitle = englishTitle;
this.primaryTitle = primaryTitle;
this.officialTitle = officialTitle;
}
@ -287,17 +253,17 @@ public class AnidbClient extends AbstractEpisodeListProvider {
@Override
public String getName() {
return mainTitle;
return primaryTitle;
}
public String getMainTitle() {
return mainTitle;
public String getPrimaryTitle() {
return primaryTitle;
}
public String getEnglishTitle() {
return englishTitle;
public String getOfficialTitle(String key) {
return officialTitle != null ? officialTitle.get(key) : null;
}
}

View File

@ -0,0 +1,129 @@
package net.sourceforge.filebot.web;
import static java.util.Collections.*;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
class LocalSearch<T> {
private final AbstractStringMetric metric = new QGramsDistance();
private final float resultMinimumSimilarity = 0.5f;
private final int resultSetSize = 20;
private final List<T> objects;
private final List<Set<String>> fields;
public LocalSearch(Collection<? extends T> data) {
objects = new ArrayList<T>(data);
fields = new ArrayList<Set<String>>(objects.size());
for (int i = 0; i < objects.size(); i++) {
fields.add(i, getFields(objects.get(i)));
}
}
public List<T> search(String query) throws ExecutionException, InterruptedException {
final String q = normalize(query);
List<Callable<Entry<T, Float>>> tasks = new ArrayList<Callable<Entry<T, Float>>>(objects.size());
for (int i = 0; i < objects.size(); i++) {
final int index = i;
tasks.add(new Callable<Entry<T, Float>>() {
@Override
public Entry<T, Float> call() throws Exception {
float similarity = 0;
boolean match = false;
for (String field : fields.get(index)) {
match |= field.contains(q);
similarity = Math.max(metric.getSimilarity(q, field), similarity);
}
return match || similarity > resultMinimumSimilarity ? new SimpleEntry<T, Float>(objects.get(index), similarity) : null;
}
});
}
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
final List<Entry<T, Float>> resultSet = new ArrayList<Entry<T, Float>>(objects.size());
try {
for (Future<Entry<T, Float>> entry : executor.invokeAll(tasks)) {
if (entry.get() != null) {
resultSet.add(entry.get());
}
}
} finally {
executor.shutdown();
}
// sort by similarity descending (best matches first)
sort(resultSet, new Comparator<Entry<T, Float>>() {
@Override
public int compare(Entry<T, Float> o1, Entry<T, Float> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
// view for the first 20 search results
return new AbstractList<T>() {
@Override
public T get(int index) {
return resultSet.get(index).getKey();
}
@Override
public int size() {
return Math.min(resultSetSize, resultSet.size());
}
};
}
protected Set<String> getFields(T object) {
return set(object.toString());
}
protected Set<String> set(String... values) {
Set<String> set = new HashSet<String>(values.length);
for (String value : values) {
if (value != null) {
set.add(normalize(value));
}
}
return set;
}
protected String normalize(String value) {
// normalize separator, normalize case and trim
return value.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim().toLowerCase();
}
}

View File

@ -10,16 +10,12 @@ import java.io.Reader;
import java.io.Serializable;
import java.net.URI;
import java.net.URL;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map.Entry;
import java.util.Set;
import javax.net.ssl.HttpsURLConnection;
import javax.swing.Icon;
@ -28,9 +24,6 @@ import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
@ -63,55 +56,22 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
@Override
public List<SearchResult> search(String query, Locale locale) throws IOException {
// normalize
query = query.toLowerCase();
AbstractStringMetric metric = new QGramsDistance();
final List<Entry<SearchResult, Float>> resultSet = new ArrayList<Entry<SearchResult, Float>>();
for (SerienjunkiesSearchResult anime : getSeriesTitles()) {
for (String name : new String[] { anime.getMainTitle(), anime.getGermanTitle() }) {
if (name != null) {
// normalize
name = name.toLowerCase();
float similarity = metric.getSimilarity(name, query);
if (similarity > 0.5 || name.contains(query)) {
resultSet.add(new SimpleEntry<SearchResult, Float>(anime, similarity));
// add only once
break;
}
}
}
}
// sort by similarity descending (best matches first)
Collections.sort(resultSet, new Comparator<Entry<SearchResult, Float>>() {
@Override
public int compare(Entry<SearchResult, Float> o1, Entry<SearchResult, Float> o2) {
return o2.getValue().compareTo(o1.getValue());
}
});
// view for the first 20 search results
return new AbstractList<SearchResult>() {
@Override
public SearchResult get(int index) {
return resultSet.get(index).getKey();
public Locale getDefaultLocale() {
return Locale.GERMAN;
}
@Override
public int size() {
return Math.min(20, resultSet.size());
public List<SearchResult> search(String query, Locale locale) throws Exception {
LocalSearch<SerienjunkiesSearchResult> index = new LocalSearch<SerienjunkiesSearchResult>(getSeriesTitles()) {
@Override
protected Set<String> getFields(SerienjunkiesSearchResult series) {
return set(series.getMainTitle(), series.getGermanTitle());
}
};
return new ArrayList<SearchResult>(index.search(query));
}
@ -158,6 +118,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
// fetch episode data
episodes = new ArrayList<Episode>(25);
String seriesName = locale.equals(Locale.GERMAN) && series.getGermanTitle() != null ? series.getGermanTitle() : series.getMainTitle();
JSONObject data = (JSONObject) request("/allepisodes.php?d=" + apikey + "&q=" + series.getSeriesId());
JSONArray list = (JSONArray) data.get("allepisodes");
@ -169,7 +130,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
String title = (String) obj.get("german");
Date airdate = Date.parse((String) ((JSONObject) obj.get("airdates")).get("premiere"), "yyyy-MM-dd");
episodes.add(new Episode(series.getName(), series.getStartDate(), season, episode, title, i + 1, null, airdate));
episodes.add(new Episode(seriesName, series.getStartDate(), season, episode, title, i + 1, null, airdate));
}
// populate cache
@ -241,7 +202,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
@Override
public String getName() {
return germanTitle != null ? germanTitle : mainTitle; // prefer german title
return germanTitle != null ? germanTitle : mainTitle; // prefer German title
}

View File

@ -36,7 +36,7 @@ public class AnidbClientTest {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
monsterSearchResult = new AnidbSearchResult(1539, "Monster", null);
twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", "The Twelve Kingdoms");
twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", null);
princessTutuSearchResult = new AnidbSearchResult(516, "Princess Tutu", null);
}
@ -49,7 +49,16 @@ public class AnidbClientTest {
List<SearchResult> results = anidb.search("one piece");
AnidbSearchResult result = (AnidbSearchResult) results.get(0);
assertEquals("One Piece", result.getName());
assertEquals(69, result.getAnimeId());
}
@Test
public void searchJapanese() throws Exception {
List<SearchResult> results = anidb.search("ワンピース", Locale.JAPANESE);
AnidbSearchResult result = (AnidbSearchResult) results.get(0);
assertEquals("One Piece", result.getName());
assertEquals(69, result.getAnimeId());
}