2007-12-23 14:28:04 -05:00
|
|
|
|
|
|
|
package net.sourceforge.filebot.web;
|
|
|
|
|
|
|
|
|
2009-05-17 13:22:44 -04:00
|
|
|
import static net.sourceforge.filebot.web.WebRequest.*;
|
|
|
|
import static net.sourceforge.tuned.XPathUtilities.*;
|
2009-01-04 13:28:28 -05:00
|
|
|
|
2007-12-23 14:28:04 -05:00
|
|
|
import java.io.IOException;
|
2009-10-28 11:09:47 -04:00
|
|
|
import java.io.Serializable;
|
2007-12-23 14:28:04 -05:00
|
|
|
import java.net.MalformedURLException;
|
2008-06-21 15:24:18 -04:00
|
|
|
import java.net.URI;
|
2009-10-28 11:09:47 -04:00
|
|
|
import java.net.URISyntaxException;
|
2007-12-23 14:28:04 -05:00
|
|
|
import java.net.URL;
|
2009-12-03 19:24:35 -05:00
|
|
|
import java.net.URLConnection;
|
2009-10-28 11:09:47 -04:00
|
|
|
import java.util.AbstractList;
|
2007-12-23 14:28:04 -05:00
|
|
|
import java.util.ArrayList;
|
2009-10-28 11:09:47 -04:00
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.Comparator;
|
|
|
|
import java.util.HashMap;
|
2007-12-23 14:28:04 -05:00
|
|
|
import java.util.List;
|
2009-10-28 11:09:47 -04:00
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Scanner;
|
|
|
|
import java.util.TreeMap;
|
|
|
|
import java.util.AbstractMap.SimpleEntry;
|
|
|
|
import java.util.Map.Entry;
|
2008-02-09 12:53:08 -05:00
|
|
|
import java.util.logging.Logger;
|
2009-07-13 08:40:27 -04:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
2009-10-28 11:09:47 -04:00
|
|
|
import java.util.zip.GZIPInputStream;
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2008-07-13 13:59:05 -04:00
|
|
|
import javax.swing.Icon;
|
|
|
|
|
2007-12-23 14:28:04 -05:00
|
|
|
import org.w3c.dom.Document;
|
|
|
|
import org.w3c.dom.Node;
|
|
|
|
import org.xml.sax.SAXException;
|
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
|
|
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
|
|
|
|
|
|
|
import net.sf.ehcache.Cache;
|
|
|
|
import net.sf.ehcache.CacheManager;
|
|
|
|
import net.sf.ehcache.Element;
|
2009-06-27 08:03:48 -04:00
|
|
|
import net.sourceforge.filebot.ResourceManager;
|
|
|
|
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2009-03-18 16:09:45 -04:00
|
|
|
public class AnidbClient implements EpisodeListProvider {
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2008-07-30 18:37:01 -04:00
|
|
|
private static final String host = "anidb.net";
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
private static final Cache cache = CacheManager.getInstance().getCache("anidb");
|
|
|
|
|
2009-06-27 08:03:48 -04:00
|
|
|
|
2008-07-13 13:59:05 -04:00
|
|
|
@Override
|
|
|
|
public String getName() {
|
|
|
|
return "AniDB";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Icon getIcon() {
|
|
|
|
return ResourceManager.getIcon("search.anidb");
|
|
|
|
}
|
2007-12-23 14:28:04 -05:00
|
|
|
|
|
|
|
|
|
|
|
@Override
|
2009-01-04 13:28:28 -05:00
|
|
|
public List<SearchResult> search(String query) throws IOException, SAXException {
|
2009-10-28 11:09:47 -04:00
|
|
|
// normalize
|
|
|
|
query = query.toLowerCase();
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
AbstractStringMetric metric = new QGramsDistance();
|
2008-03-29 08:20:01 -04:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
final List<Entry<SearchResult, Float>> resultSet = new ArrayList<Entry<SearchResult, Float>>();
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
for (AnidbSearchResult anime : getAnimeTitles()) {
|
|
|
|
for (String name : new String[] { anime.getMainTitle(), anime.getEnglishTitle() }) {
|
|
|
|
if (name != null) {
|
2009-11-02 18:25:04 -05:00
|
|
|
// normalize
|
|
|
|
name = name.toLowerCase();
|
2009-10-28 11:09:47 -04:00
|
|
|
|
2009-11-02 18:25:04 -05:00
|
|
|
float similarity = metric.getSimilarity(name, query);
|
|
|
|
|
|
|
|
if (similarity > 0.5 || name.contains(query)) {
|
2009-10-28 11:09:47 -04:00
|
|
|
resultSet.add(new SimpleEntry<SearchResult, Float>(anime, similarity));
|
|
|
|
|
|
|
|
// add only once
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-07-07 19:38:17 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
// sort by similarity descending (best matches first)
|
|
|
|
Collections.sort(resultSet, new Comparator<Entry<SearchResult, Float>>() {
|
2009-05-25 16:13:30 -04:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
@Override
|
|
|
|
public int compare(Entry<SearchResult, Float> o1, Entry<SearchResult, Float> o2) {
|
|
|
|
return o2.getValue().compareTo(o1.getValue());
|
2007-12-23 14:28:04 -05:00
|
|
|
}
|
2009-10-28 11:09:47 -04:00
|
|
|
});
|
2007-12-23 14:28:04 -05:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
// view for the first 20 search results
|
|
|
|
return new AbstractList<SearchResult>() {
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public SearchResult get(int index) {
|
|
|
|
return resultSet.get(index).getKey();
|
|
|
|
}
|
|
|
|
|
2009-05-25 16:13:30 -04:00
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
@Override
|
|
|
|
public int size() {
|
|
|
|
return Math.min(20, resultSet.size());
|
|
|
|
}
|
|
|
|
};
|
2009-05-25 16:13:30 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-07-13 08:40:27 -04:00
|
|
|
@Override
|
|
|
|
public List<Episode> getEpisodeList(SearchResult searchResult) throws IOException, SAXException {
|
2009-10-28 11:09:47 -04:00
|
|
|
int aid = ((AnidbSearchResult) searchResult).getAnimeId();
|
|
|
|
URL url = new URL("http", host, "/perl-bin/animedb.pl?show=xml&t=anime&aid=" + aid);
|
|
|
|
|
|
|
|
// try cache first
|
|
|
|
try {
|
|
|
|
return Arrays.asList((Episode[]) cache.get(url.toString()).getValue());
|
|
|
|
} catch (Exception e) {
|
|
|
|
// ignore
|
|
|
|
}
|
2009-07-13 08:40:27 -04:00
|
|
|
|
2009-12-03 19:24:35 -05:00
|
|
|
// set request headers to resemble an ajax request
|
|
|
|
URLConnection connection = url.openConnection();
|
|
|
|
connection.setRequestProperty("X-LControl", "x-no-cache");
|
|
|
|
|
2009-07-13 08:40:27 -04:00
|
|
|
// get anime page as xml
|
2009-12-03 19:24:35 -05:00
|
|
|
Document dom = getDocument(connection);
|
2009-07-13 08:40:27 -04:00
|
|
|
|
|
|
|
// select main title
|
2009-10-28 11:09:47 -04:00
|
|
|
String animeTitle = selectString("//title[@type='main']", dom);
|
2009-07-13 08:40:27 -04:00
|
|
|
|
|
|
|
List<Episode> episodes = new ArrayList<Episode>(25);
|
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
for (Node node : selectNodes("//ep", dom)) {
|
2010-10-24 08:10:30 -04:00
|
|
|
Integer number = getIntegerContent("epno", node);
|
2009-07-13 08:40:27 -04:00
|
|
|
|
2009-11-21 14:21:46 -05:00
|
|
|
// ignore special episodes
|
2010-10-24 08:10:30 -04:00
|
|
|
if (number != null) {
|
2009-10-28 11:09:47 -04:00
|
|
|
String title = selectString(".//title[@lang='en']", node);
|
2010-10-23 08:47:43 -04:00
|
|
|
String airdate = selectString(".//date/@rel", node);
|
2009-07-13 08:40:27 -04:00
|
|
|
|
|
|
|
// no seasons for anime
|
2010-11-09 03:04:12 -05:00
|
|
|
episodes.add(new Episode(animeTitle, null, number, title, number, null, Date.parse(airdate, "yyyy-MM-dd")));
|
2009-07-13 08:40:27 -04:00
|
|
|
}
|
|
|
|
}
|
2009-05-25 16:13:30 -04:00
|
|
|
|
2009-07-13 08:40:27 -04:00
|
|
|
// sanity check
|
2009-10-28 11:09:47 -04:00
|
|
|
if (episodes.size() > 0) {
|
|
|
|
// populate cache
|
|
|
|
cache.put(new Element(url.toString(), episodes.toArray(new Episode[0])));
|
|
|
|
} else {
|
2009-07-13 08:40:27 -04:00
|
|
|
// anime page xml doesn't work sometimes
|
|
|
|
Logger.getLogger(getClass().getName()).warning(String.format("Failed to parse episode data from xml: %s (%d)", searchResult, aid));
|
|
|
|
}
|
2009-05-25 16:13:30 -04:00
|
|
|
|
2009-07-13 08:40:27 -04:00
|
|
|
return episodes;
|
2007-12-23 14:28:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-05 23:17:23 -04:00
|
|
|
@Override
|
|
|
|
public URI getEpisodeListLink(SearchResult searchResult) {
|
2009-10-28 11:09:47 -04:00
|
|
|
int aid = ((AnidbSearchResult) searchResult).getAnimeId();
|
|
|
|
|
|
|
|
try {
|
|
|
|
return new URI("http", host, "/a" + aid, null);
|
|
|
|
} catch (URISyntaxException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
2008-07-05 23:17:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean hasSingleSeasonSupport() {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
2009-02-09 15:56:20 -05:00
|
|
|
public List<Episode> getEpisodeList(SearchResult searchResult, int season) throws Exception {
|
2008-07-05 23:17:23 -04:00
|
|
|
throw new UnsupportedOperationException();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-03-29 08:20:01 -04:00
|
|
|
@Override
|
2008-06-21 15:24:18 -04:00
|
|
|
public URI getEpisodeListLink(SearchResult searchResult, int season) {
|
2009-03-17 17:59:19 -04:00
|
|
|
return null;
|
2007-12-23 14:28:04 -05:00
|
|
|
}
|
|
|
|
|
2009-10-28 11:09:47 -04:00
|
|
|
|
|
|
|
private AnidbSearchResult[] getAnimeTitles() throws MalformedURLException, IOException, SAXException {
|
|
|
|
URL url = new URL("http", host, "/api/animetitles.dat.gz");
|
|
|
|
|
|
|
|
// try cache first
|
|
|
|
try {
|
|
|
|
return (AnidbSearchResult[]) cache.get(url.toString()).getValue();
|
|
|
|
} catch (Exception e) {
|
|
|
|
// ignore
|
|
|
|
}
|
|
|
|
|
|
|
|
// <aid>|<type>|<language>|<title>
|
|
|
|
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
|
|
|
|
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
|
|
|
|
|
|
|
|
Map<Integer, String> primaryTitleMap = new TreeMap<Integer, String>();
|
|
|
|
Map<Integer, String> englishTitleMap = new HashMap<Integer, String>();
|
|
|
|
|
|
|
|
// fetch data
|
|
|
|
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
|
|
|
|
|
|
|
|
try {
|
|
|
|
while (scanner.hasNextLine()) {
|
|
|
|
Matcher matcher = pattern.matcher(scanner.nextLine());
|
|
|
|
|
|
|
|
if (matcher.matches()) {
|
|
|
|
if (matcher.group(2).equals("1")) {
|
|
|
|
primaryTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4));
|
|
|
|
} else if (matcher.group(2).equals("4") && matcher.group(3).equals("en")) {
|
|
|
|
englishTitleMap.put(Integer.parseInt(matcher.group(1)), matcher.group(4));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
scanner.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
List<AnidbSearchResult> anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
|
|
|
|
|
|
|
|
for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
|
|
|
|
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitleMap.get(entry.getKey())));
|
|
|
|
}
|
|
|
|
|
|
|
|
// populate cache
|
|
|
|
AnidbSearchResult[] result = anime.toArray(new AnidbSearchResult[0]);
|
|
|
|
cache.put(new Element(url.toString(), result));
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public static class AnidbSearchResult extends SearchResult implements Serializable {
|
|
|
|
|
|
|
|
protected int aid;
|
|
|
|
protected String mainTitle;
|
|
|
|
protected String englishTitle;
|
|
|
|
|
|
|
|
|
|
|
|
protected AnidbSearchResult() {
|
|
|
|
// used by serializer
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public AnidbSearchResult(int aid, String mainTitle, String englishTitle) {
|
|
|
|
this.aid = aid;
|
|
|
|
this.mainTitle = mainTitle;
|
|
|
|
this.englishTitle = englishTitle;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public int getAnimeId() {
|
|
|
|
return aid;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getName() {
|
|
|
|
return mainTitle;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public String getMainTitle() {
|
|
|
|
return mainTitle;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public String getEnglishTitle() {
|
|
|
|
return englishTitle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-12-23 14:28:04 -05:00
|
|
|
}
|