filebot/source/net/filebot/web/AnidbClient.java

292 lines
10 KiB
Java

package net.filebot.web;
import static java.nio.charset.StandardCharsets.*;
import static java.util.Collections.*;
import static java.util.stream.Collectors.*;
import static net.filebot.CachedResource.*;
import static net.filebot.Logging.*;
import static net.filebot.util.StringUtilities.*;
import static net.filebot.util.XPathUtilities.*;
import static net.filebot.web.EpisodeUtilities.*;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URL;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.swing.Icon;
import org.jsoup.Jsoup;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import net.filebot.Cache;
import net.filebot.CacheType;
import net.filebot.Resource;
import net.filebot.ResourceManager;
public class AnidbClient extends AbstractEpisodeListProvider implements ArtworkProvider {
private static final FloodLimit REQUEST_LIMIT = new FloodLimit(1, 5, TimeUnit.SECONDS); // no more than 1 requests within a 5 second window
private final String client;
private final int clientver;
public AnidbClient(String client, int clientver) {
this.client = client;
this.clientver = clientver;
}
@Override
public String getIdentifier() {
return "AniDB";
}
@Override
public Icon getIcon() {
return ResourceManager.getIcon("search.anidb");
}
@Override
public boolean hasSeasonSupport() {
return false;
}
@Override
public SortOrder vetoRequestParameter(SortOrder order) {
return order == SortOrder.Absolute || order == SortOrder.AbsoluteAirdate ? order : SortOrder.Absolute;
}
@Override
protected Cache getCache(String section) {
return Cache.getCache(getName() + "_" + section, CacheType.Weekly);
}
@Override
public List<SearchResult> search(String query, Locale locale) throws Exception {
// bypass automatic caching since search is based on locally cached data anyway
return fetchSearchResult(query, locale);
}
// local AniDB search index
private final Resource<LocalSearch<SearchResult>> localIndex = Resource.lazy(() -> new LocalSearch<SearchResult>(getAnimeTitles(), SearchResult::getEffectiveNames));
@Override
public List<SearchResult> fetchSearchResult(String query, Locale locale) throws Exception {
return localIndex.get().search(query);
}
@Override
protected SeriesData fetchSeriesData(SearchResult anime, SortOrder sortOrder, Locale locale) throws Exception {
// get anime page as xml
Document dom = getXmlResource(anime.getId());
// check for errors (e.g. <error>Banned</error>)
String error = selectString("/error", dom);
if (error != null && error.length() > 0) {
throw new IllegalStateException(String.format("%s error: %s", getName(), error));
}
// parse series info
SeriesInfo seriesInfo = new SeriesInfo(this, sortOrder, locale, anime.getId(), SeriesInfo.TYPE_ANIME);
seriesInfo.setAliasNames(anime.getAliasNames());
// AniDB types: Movie, Music Video, Other, OVA, TV Series, TV Special, Web, unknown
String animeType = selectString("//type", dom);
if (animeType != null && animeType.matches("(?i:music.video|unkown)")) {
return new SeriesData(seriesInfo, emptyList());
}
seriesInfo.setName(selectString("anime/titles/title[@type='main']", dom));
seriesInfo.setRating(getDecimal(selectString("anime/ratings/permanent", dom)));
seriesInfo.setRatingCount(matchInteger(getTextContent("anime/ratings/permanent/@count", dom)));
seriesInfo.setStartDate(SimpleDate.parse(selectString("anime/startdate", dom)));
// add categories ordered by weight as genres
// * only use categories with weight >= 400
// * sort by weight (descending)
// * limit to 5 genres
seriesInfo.setGenres(streamNodes("anime/categories/category", dom).map(categoryNode -> {
String name = getTextContent("name", categoryNode);
Integer weight = matchInteger(getAttribute("weight", categoryNode));
return new SimpleImmutableEntry<String, Integer>(name, weight);
}).filter(nw -> {
return nw.getKey() != null && nw.getValue() != null && nw.getKey().length() > 0 && nw.getValue() >= 400;
}).sorted((a, b) -> {
return b.getValue().compareTo(a.getValue());
}).map(it -> it.getKey()).limit(5).collect(Collectors.toList()));
// parse episode data
String animeTitle = selectString("anime/titles/title[@type='official' and @lang='" + getLanguageCode(locale) + "']", dom);
if (animeTitle == null || animeTitle.length() == 0) {
animeTitle = seriesInfo.getName();
}
List<Episode> episodes = new ArrayList<Episode>(25);
for (Node node : selectNodes("anime/episodes/episode", dom)) {
Node epno = getChild("epno", node);
int number = Integer.parseInt(getTextContent(epno).replaceAll("\\D", ""));
int type = Integer.parseInt(getAttribute("type", epno));
if (type == 1 || type == 2) {
Integer id = Integer.parseInt(getAttribute("id", node));
SimpleDate airdate = SimpleDate.parse(getTextContent("airdate", node));
String title = selectString(".//title[@lang='" + getLanguageCode(locale) + "']", node);
if (title.isEmpty()) { // English language fall-back
title = selectString(".//title[@lang='en']", node);
}
if (type == 1) {
// adjust for forced absolute numbering (if possible)
if (sortOrder == SortOrder.AbsoluteAirdate && airdate != null) {
// use airdate as absolute episode number
number = airdate.getYear() * 1_00_00 + airdate.getMonth() * 1_00 + airdate.getDay();
}
episodes.add(new Episode(animeTitle, null, number, title, number, null, airdate, id, new SeriesInfo(seriesInfo))); // normal episode, no seasons for anime
} else {
episodes.add(new Episode(animeTitle, null, null, title, null, number, airdate, id, new SeriesInfo(seriesInfo))); // special episode
}
}
}
// make sure episodes are in ordered correctly
episodes.sort(episodeComparator());
// sanity check
if (episodes.isEmpty()) {
debug.fine(format("No episode data: %s (%d) => %s", anime, anime.getId(), getResource(anime.getId())));
}
return new SeriesData(seriesInfo, episodes);
}
private Document getXmlResource(int aid) throws Exception {
Cache cache = Cache.getCache(getName(), CacheType.Monthly);
return cache.xml(aid, this::getResource).fetch(withPermit(fetchIfModified(), r -> REQUEST_LIMIT.acquirePermit())).expire(Cache.ONE_WEEK).get();
}
private URL getResource(int aid) throws Exception {
// e.g. http://api.anidb.net:9001/httpapi?request=anime&client=filebot&clientver=1&protover=1&aid=4521
return new URL("http://api.anidb.net:9001/httpapi?request=anime&client=" + client + "&clientver=" + clientver + "&protover=1&aid=" + aid);
}
@Override
public URI getEpisodeListLink(SearchResult searchResult) {
return URI.create("http://anidb.net/a" + searchResult.getId());
}
/**
* Map locale to AniDB language code
*/
public String getLanguageCode(Locale locale) {
// Note: ISO 639 is not a stable standard— some languages' codes have changed.
// Locale's constructor recognizes both the new and the old codes for the languages whose codes have changed,
// but this function always returns the old code.
String code = locale.getLanguage();
// Java language code => AniDB language code
switch (code) {
case "iw":
return "he"; // Hebrew
case "in":
return "id"; // Indonesian
}
return code;
}
@Override
public List<Artwork> getArtwork(int id, String category, Locale locale) throws Exception {
Document dom = getXmlResource(id);
String picture = selectString("anime/picture", dom);
List<Artwork> artwork = new ArrayList<Artwork>(1);
if (picture.length() > 0) {
URL url = new URL("https://img7.anidb.net/pics/anime/" + picture);
artwork.add(new Artwork(Stream.of("picture"), url, null, null));
}
return artwork;
}
/**
* This method is overridden in {@link net.filebot.WebServices.AnidbClientWithLocalSearch} to fetch the Anime Index from our own host and not anidb.net
*/
public SearchResult[] getAnimeTitles() throws Exception {
// get data file (unzip and cache)
byte[] bytes = getCache("root").bytes("anime-titles.dat.gz", n -> new URL("http://anidb.net/api/" + n)).get();
// <aid>|<type>|<language>|<title>
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
List<String> languageOrder = new ArrayList<String>();
languageOrder.add("x-jat");
languageOrder.add("en");
languageOrder.add("ja");
List<String> typeOrder = new ArrayList<String>();
typeOrder.add("1");
typeOrder.add("4");
typeOrder.add("2");
typeOrder.add("3");
// fetch data
Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536);
try (BufferedReader text = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(bytes), UTF_8))) {
text.lines().forEach(line -> {
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
int aid = Integer.parseInt(matcher.group(1));
String type = matcher.group(2);
String language = matcher.group(3);
String title = matcher.group(4);
if (aid > 0 && title.length() > 0 && typeOrder.contains(type) && languageOrder.contains(language)) {
// resolve HTML entities
title = Jsoup.parse(title).text();
if (type.equals("3") && (title.length() < 5 || !Character.isUpperCase(title.charAt(0)) || Character.isUpperCase(title.charAt(title.length() - 1)))) {
return;
}
entriesByAnime.computeIfAbsent(aid, k -> new ArrayList<Object[]>()).add(new Object[] { typeOrder.indexOf(type), languageOrder.indexOf(language), title });
}
}
});
}
// build up a list of all possible AniDB search results
return entriesByAnime.entrySet().stream().map(it -> {
List<String> names = it.getValue().stream().sorted((a, b) -> {
for (int i = 0; i < a.length; i++) {
if (!a[i].equals(b[i])) {
return ((Comparable) a[i]).compareTo(b[i]);
}
}
return 0;
}).map(n -> n[2].toString()).collect(toList());
String primaryTitle = names.get(0);
List<String> aliasNames = names.subList(1, names.size());
return new SearchResult(it.getKey(), primaryTitle, aliasNames);
}).toArray(SearchResult[]::new);
}
}