2007-12-23 14:28:04 -05:00
package net.sourceforge.filebot.web ;
2013-09-11 13:22:00 -04:00
import static net.sourceforge.filebot.web.EpisodeUtilities.* ;
import static net.sourceforge.filebot.web.WebRequest.* ;
import static net.sourceforge.tuned.XPathUtilities.* ;
2009-01-04 13:28:28 -05:00
2008-06-21 15:24:18 -04:00
import java.net.URI ;
2009-10-28 11:09:47 -04:00
import java.net.URISyntaxException ;
2007-12-23 14:28:04 -05:00
import java.net.URL ;
import java.util.ArrayList ;
2013-12-27 17:49:56 -05:00
import java.util.Collections ;
import java.util.Comparator ;
2009-10-28 11:09:47 -04:00
import java.util.HashMap ;
2007-12-23 14:28:04 -05:00
import java.util.List ;
2010-11-11 09:23:59 -05:00
import java.util.Locale ;
2009-10-28 11:09:47 -04:00
import java.util.Map ;
2012-01-03 04:18:32 -05:00
import java.util.Map.Entry ;
2009-10-28 11:09:47 -04:00
import java.util.Scanner ;
2011-11-04 01:23:23 -04:00
import java.util.Set ;
2013-03-29 23:44:24 -04:00
import java.util.concurrent.TimeUnit ;
2013-12-27 13:16:08 -05:00
import java.util.logging.Level ;
import java.util.logging.Logger ;
2009-07-13 08:40:27 -04:00
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
2009-10-28 11:09:47 -04:00
import java.util.zip.GZIPInputStream ;
2007-12-23 14:28:04 -05:00
2008-07-13 13:59:05 -04:00
import javax.swing.Icon ;
2012-07-25 00:34:20 -04:00
import net.sourceforge.filebot.Cache ;
import net.sourceforge.filebot.ResourceManager ;
2013-12-27 17:49:56 -05:00
import org.jsoup.Jsoup ;
2007-12-23 14:28:04 -05:00
import org.w3c.dom.Document ;
import org.w3c.dom.Node ;
2011-08-08 13:37:45 -04:00
public class AnidbClient extends AbstractEpisodeListProvider {
2013-09-07 11:48:24 -04:00
2013-03-29 23:44:24 -04:00
private static final FloodLimit REQUEST_LIMIT = new FloodLimit ( 5 , 12 , TimeUnit . SECONDS ) ; // no more than 5 requests within a 10 second window (+2 seconds for good measure)
2013-09-07 11:48:24 -04:00
2011-11-13 13:22:50 -05:00
private final String host = " anidb.net " ;
2013-09-07 11:48:24 -04:00
2011-01-06 22:51:08 -05:00
private final String client ;
private final int clientver ;
2013-09-07 11:48:24 -04:00
2011-01-06 22:51:08 -05:00
public AnidbClient ( String client , int clientver ) {
this . client = client ;
this . clientver = clientver ;
}
2013-09-07 11:48:24 -04:00
2008-07-13 13:59:05 -04:00
@Override
public String getName ( ) {
return " AniDB " ;
}
2013-09-07 11:48:24 -04:00
2008-07-13 13:59:05 -04:00
@Override
public Icon getIcon ( ) {
return ResourceManager . getIcon ( " search.anidb " ) ;
}
2013-09-07 11:48:24 -04:00
2007-12-23 14:28:04 -05:00
@Override
2011-08-08 13:37:45 -04:00
public boolean hasSingleSeasonSupport ( ) {
return false ;
}
2013-09-07 11:48:24 -04:00
2011-08-08 13:37:45 -04:00
@Override
public boolean hasLocaleSupport ( ) {
return true ;
}
2013-09-07 11:48:24 -04:00
2011-11-26 04:44:49 -05:00
@Override
public ResultCache getCache ( ) {
2013-03-17 10:19:11 -04:00
return new ResultCache ( host , Cache . getCache ( " web-datasource-lv2 " ) ) ;
2011-11-26 04:44:49 -05:00
}
2013-09-07 11:48:24 -04:00
2011-08-08 13:37:45 -04:00
@Override
2011-11-04 01:23:23 -04:00
public List < SearchResult > search ( String query , final Locale locale ) throws Exception {
2011-11-26 04:44:49 -05:00
// bypass automatic caching since search is based on locally cached data anyway
return fetchSearchResult ( query , locale ) ;
}
2013-09-07 11:48:24 -04:00
2011-11-26 04:44:49 -05:00
@Override
public List < SearchResult > fetchSearchResult ( String query , final Locale locale ) throws Exception {
2013-09-07 11:48:24 -04:00
LocalSearch < SearchResult > index = new LocalSearch < SearchResult > ( getAnimeTitles ( ) ) {
2009-10-28 11:09:47 -04:00
@Override
2013-09-07 11:48:24 -04:00
protected Set < String > getFields ( SearchResult it ) {
2013-10-13 10:50:45 -04:00
return set ( it . getEffectiveNames ( ) ) ;
2009-10-28 11:09:47 -04:00
}
} ;
2013-09-07 11:48:24 -04:00
2011-11-04 01:23:23 -04:00
return new ArrayList < SearchResult > ( index . search ( query ) ) ;
2009-05-25 16:13:30 -04:00
}
2013-09-07 11:48:24 -04:00
2009-07-13 08:40:27 -04:00
@Override
2012-02-13 04:54:57 -05:00
public List < Episode > fetchEpisodeList ( SearchResult searchResult , SortOrder sortOrder , Locale language ) throws Exception {
2011-08-08 13:37:45 -04:00
AnidbSearchResult anime = ( AnidbSearchResult ) searchResult ;
2013-09-07 11:48:24 -04:00
2011-01-06 22:51:08 -05:00
// e.g. http://api.anidb.net:9001/httpapi?request=anime&client=filebot&clientver=1&protover=1&aid=4521
URL url = new URL ( " http " , " api. " + host , 9001 , " /httpapi?request=anime&client= " + client + " &clientver= " + clientver + " &protover=1&aid= " + anime . getAnimeId ( ) ) ;
2013-09-07 11:48:24 -04:00
2013-03-29 23:44:24 -04:00
// respect flood protection limits
REQUEST_LIMIT . acquirePermit ( ) ;
2013-09-07 11:48:24 -04:00
2009-07-13 08:40:27 -04:00
// get anime page as xml
2011-01-06 22:51:08 -05:00
Document dom = getDocument ( url ) ;
2013-09-07 11:48:24 -04:00
2011-10-01 00:08:46 -04:00
// select main title and anime start date
Date seriesStartDate = Date . parse ( selectString ( " //startdate " , dom ) , " yyyy-MM-dd " ) ;
2011-08-08 13:37:45 -04:00
String animeTitle = selectString ( " //titles/title[@type='official' and @lang=' " + language . getLanguage ( ) + " '] " , dom ) ;
if ( animeTitle . isEmpty ( ) ) {
animeTitle = selectString ( " //titles/title[@type='main'] " , dom ) ;
}
2013-09-07 11:48:24 -04:00
2011-11-26 04:44:49 -05:00
List < Episode > episodes = new ArrayList < Episode > ( 25 ) ;
2013-09-07 11:48:24 -04:00
2011-01-06 22:51:08 -05:00
for ( Node node : selectNodes ( " //episode " , dom ) ) {
2013-05-06 04:21:20 -04:00
Node epno = getChild ( " epno " , node ) ;
int number = Integer . parseInt ( getTextContent ( epno ) . replaceAll ( " \\ D " , " " ) ) ;
int type = Integer . parseInt ( getAttribute ( " type " , epno ) ) ;
2013-09-07 11:48:24 -04:00
2013-05-06 04:21:20 -04:00
if ( type = = 1 | | type = = 2 ) {
2011-10-01 00:08:46 -04:00
Date airdate = Date . parse ( getTextContent ( " airdate " , node ) , " yyyy-MM-dd " ) ;
2010-11-11 09:23:59 -05:00
String title = selectString ( " .//title[@lang=' " + language . getLanguage ( ) + " '] " , node ) ;
2011-08-08 13:37:45 -04:00
if ( title . isEmpty ( ) ) { // English language fall-back
title = selectString ( " .//title[@lang='en'] " , node ) ;
}
2013-09-07 11:48:24 -04:00
2013-05-06 04:21:20 -04:00
if ( type = = 1 ) {
2013-07-13 06:01:33 -04:00
episodes . add ( new Episode ( animeTitle , seriesStartDate , null , number , title , number , null , airdate , searchResult ) ) ; // normal episode, no seasons for anime
2013-05-06 04:21:20 -04:00
} else {
2013-07-13 06:01:33 -04:00
episodes . add ( new Episode ( animeTitle , seriesStartDate , null , null , title , null , number , airdate , searchResult ) ) ; // special episode
2013-05-06 04:21:20 -04:00
}
2009-07-13 08:40:27 -04:00
}
}
2013-09-07 11:48:24 -04:00
2011-01-06 22:51:08 -05:00
// make sure episodes are in ordered correctly
sortEpisodes ( episodes ) ;
2013-09-07 11:48:24 -04:00
// sanity check
2011-11-26 04:44:49 -05:00
if ( episodes . isEmpty ( ) ) {
2009-07-13 08:40:27 -04:00
// anime page xml doesn't work sometimes
2013-12-27 13:16:08 -05:00
Logger . getLogger ( AnidbClient . class . getName ( ) ) . log ( Level . WARNING , String . format ( " Unable to parse any episode data from xml: %s (%d) " , anime , anime . getAnimeId ( ) ) ) ;
2009-07-13 08:40:27 -04:00
}
2013-09-07 11:48:24 -04:00
2009-07-13 08:40:27 -04:00
return episodes ;
2007-12-23 14:28:04 -05:00
}
2013-09-07 11:48:24 -04:00
2008-07-05 23:17:23 -04:00
@Override
public URI getEpisodeListLink ( SearchResult searchResult ) {
2009-10-28 11:09:47 -04:00
try {
2012-02-13 04:54:57 -05:00
return new URI ( " http " , host , " /a " + ( ( AnidbSearchResult ) searchResult ) . getAnimeId ( ) , null ) ;
2009-10-28 11:09:47 -04:00
} catch ( URISyntaxException e ) {
throw new RuntimeException ( e ) ;
}
2008-07-05 23:17:23 -04:00
}
2013-09-07 11:48:24 -04:00
2012-02-11 09:03:54 -05:00
public synchronized List < AnidbSearchResult > getAnimeTitles ( ) throws Exception {
2013-03-17 10:19:11 -04:00
URL url = new URL ( " http " , host , " /api/anime-titles.dat.gz " ) ;
2011-11-26 04:44:49 -05:00
ResultCache cache = getCache ( ) ;
2013-09-07 11:48:24 -04:00
2012-01-03 04:18:32 -05:00
@SuppressWarnings ( " unchecked " )
2011-11-26 04:44:49 -05:00
List < AnidbSearchResult > anime = ( List ) cache . getSearchResult ( null , Locale . ROOT ) ;
if ( anime ! = null ) {
2010-11-11 09:23:59 -05:00
return anime ;
2011-11-26 04:44:49 -05:00
}
2013-09-07 11:48:24 -04:00
2009-10-28 11:09:47 -04:00
// <aid>|<type>|<language>|<title>
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
Pattern pattern = Pattern . compile ( " ^(?!#)( \\ d+)[|]( \\ d)[|]([ \\ w-]+)[|](.+)$ " ) ;
2013-09-07 11:48:24 -04:00
2013-12-27 17:49:56 -05:00
List < String > languageOrder = new ArrayList < String > ( ) ;
languageOrder . add ( " x-jat " ) ;
languageOrder . add ( " en " ) ;
languageOrder . add ( " ja " ) ;
2013-09-07 11:48:24 -04:00
2009-10-28 11:09:47 -04:00
// fetch data
2013-12-27 17:49:56 -05:00
Map < Integer , List < Object [ ] > > entriesByAnime = new HashMap < Integer , List < Object [ ] > > ( 65536 ) ;
2013-09-07 11:48:24 -04:00
2013-12-27 17:49:56 -05:00
Scanner scanner = new Scanner ( new GZIPInputStream ( url . openStream ( ) ) , " UTF-8 " ) ;
2009-10-28 11:09:47 -04:00
try {
while ( scanner . hasNextLine ( ) ) {
Matcher matcher = pattern . matcher ( scanner . nextLine ( ) ) ;
2013-09-07 11:48:24 -04:00
2009-10-28 11:09:47 -04:00
if ( matcher . matches ( ) ) {
2011-11-04 01:23:23 -04:00
int aid = Integer . parseInt ( matcher . group ( 1 ) ) ;
String type = matcher . group ( 2 ) ;
String language = matcher . group ( 3 ) ;
String title = matcher . group ( 4 ) ;
2013-09-07 11:48:24 -04:00
2013-12-27 17:49:56 -05:00
if ( aid > 0 & & title . length ( ) > 0 & & languageOrder . contains ( language ) ) {
List < Object [ ] > names = entriesByAnime . get ( aid ) ;
if ( names = = null ) {
names = new ArrayList < Object [ ] > ( ) ;
entriesByAnime . put ( aid , names ) ;
2011-11-04 01:23:23 -04:00
}
2013-09-07 11:48:24 -04:00
2013-12-27 17:49:56 -05:00
// resolve HTML entities
title = Jsoup . parse ( title ) . text ( ) ;
names . add ( new Object [ ] { Integer . parseInt ( type ) , languageOrder . indexOf ( language ) , title } ) ;
2009-10-28 11:09:47 -04:00
}
}
}
} finally {
scanner . close ( ) ;
}
2013-09-07 11:48:24 -04:00
2011-11-04 01:23:23 -04:00
// build up a list of all possible AniDB search results
2013-12-27 17:49:56 -05:00
anime = new ArrayList < AnidbSearchResult > ( entriesByAnime . size ( ) ) ;
2013-09-07 11:48:24 -04:00
2013-12-27 17:49:56 -05:00
for ( Entry < Integer , List < Object [ ] > > entry : entriesByAnime . entrySet ( ) ) {
int aid = entry . getKey ( ) ;
List < Object [ ] > triples = entry . getValue ( ) ;
Collections . sort ( triples , new Comparator < Object [ ] > ( ) {
@SuppressWarnings ( { " unchecked " , " rawtypes " } )
@Override
public int compare ( Object [ ] a , Object [ ] b ) {
for ( int i = 0 ; i < a . length ; i + + ) {
if ( ! a [ i ] . equals ( b [ i ] ) )
return ( ( Comparable ) a [ i ] ) . compareTo ( b [ i ] ) ;
}
return 0 ;
}
} ) ;
List < String > names = new ArrayList < String > ( triples . size ( ) ) ;
for ( Object [ ] it : triples ) {
names . add ( ( String ) it [ 2 ] ) ;
2012-03-19 13:16:27 -04:00
}
2013-09-07 11:48:24 -04:00
2013-12-27 17:49:56 -05:00
String primaryTitle = names . get ( 0 ) ;
String [ ] aliasNames = names . subList ( 1 , names . size ( ) ) . toArray ( new String [ 0 ] ) ;
anime . add ( new AnidbSearchResult ( aid , primaryTitle , aliasNames ) ) ;
2009-10-28 11:09:47 -04:00
}
2013-09-07 11:48:24 -04:00
2009-10-28 11:09:47 -04:00
// populate cache
2011-11-26 04:44:49 -05:00
return cache . putSearchResult ( null , Locale . ROOT , anime ) ;
2009-10-28 11:09:47 -04:00
}
2007-12-23 14:28:04 -05:00
}