mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-23 08:18:52 -05:00
* use xml anime page to get episode information
This commit is contained in:
parent
02fc6180ab
commit
cdf2487f2c
@ -14,6 +14,8 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.swing.Icon;
|
||||
|
||||
@ -102,20 +104,45 @@ public class AnidbClient implements EpisodeListProvider {
|
||||
}
|
||||
|
||||
|
||||
protected String selectOfficialTitle(Document animePage, String languageName) {
|
||||
// create xpath query for official title of the given language
|
||||
// e.g. //*[@class='data']//*[contains(@class, 'official') and .//*[contains(@title, 'english')]]//LABEL
|
||||
@Override
|
||||
public List<Episode> getEpisodeList(SearchResult searchResult) throws IOException, SAXException {
|
||||
int aid = getAnimeID(getEpisodeListLink(searchResult));
|
||||
|
||||
String condition = String.format(".//*[contains(@title, '%s')]", languageName.toLowerCase());
|
||||
String xpath = String.format("//*[@class='data']//*[contains(@class, 'official') and %s]//LABEL", condition);
|
||||
// get anime page as xml
|
||||
Document dom = getDocument(new URL("http", host, "/perl-bin/animedb.pl?show=xml&t=anime&aid=" + aid));
|
||||
|
||||
return selectString(xpath, animePage);
|
||||
// select main title
|
||||
String animeTitle = selectString("//anime/titles/title[@type='main']/text()", dom);
|
||||
|
||||
List<Episode> episodes = new ArrayList<Episode>(25);
|
||||
|
||||
for (Node node : selectNodes("//anime/eps/ep", dom)) {
|
||||
String flags = getTextContent("flags", node);
|
||||
|
||||
// allow only normal and recap episodes
|
||||
if (flags == null || flags.equals("2")) {
|
||||
String number = getTextContent("epno", node);
|
||||
String title = selectString(".//title[@lang='en']", node);
|
||||
|
||||
// no seasons for anime
|
||||
episodes.add(new Episode(animeTitle, null, number, title));
|
||||
}
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if (episodes.isEmpty()) {
|
||||
// anime page xml doesn't work sometimes
|
||||
Logger.getLogger(getClass().getName()).warning(String.format("Failed to parse episode data from xml: %s (%d)", searchResult, aid));
|
||||
|
||||
// fall back to good old page scraper
|
||||
return scrapeEpisodeList(searchResult);
|
||||
}
|
||||
|
||||
return episodes;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Episode> getEpisodeList(SearchResult searchResult) throws IOException, SAXException {
|
||||
|
||||
protected List<Episode> scrapeEpisodeList(SearchResult searchResult) throws IOException, SAXException {
|
||||
Document dom = getHtmlDocument(getEpisodeListLink(searchResult).toURL());
|
||||
|
||||
// use title from anime page
|
||||
@ -142,6 +169,30 @@ public class AnidbClient implements EpisodeListProvider {
|
||||
}
|
||||
|
||||
|
||||
protected int getAnimeID(URI uri) {
|
||||
// e.g. http://anidb.net/perl-bin/animedb.pl?show=anime&aid=26
|
||||
if (uri.getQuery() != null) {
|
||||
Matcher query = Pattern.compile("aid=(\\d+)").matcher(uri.getQuery());
|
||||
|
||||
if (query.find()) {
|
||||
return Integer.parseInt(query.group(1));
|
||||
}
|
||||
}
|
||||
|
||||
// e.g. http://anidb.net/a26
|
||||
if (uri.getPath() != null) {
|
||||
Matcher path = Pattern.compile("/a(\\d+)$").matcher(uri.getPath());
|
||||
|
||||
if (path.find()) {
|
||||
return Integer.parseInt(path.group(1));
|
||||
}
|
||||
}
|
||||
|
||||
// no aid found
|
||||
throw new IllegalArgumentException("URI does not contain an aid: " + uri);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public URI getEpisodeListLink(SearchResult searchResult) {
|
||||
return ((HyperLink) searchResult).toURI();
|
||||
|
@ -75,18 +75,23 @@ public final class WebRequest {
|
||||
}
|
||||
|
||||
|
||||
public static Document getDocument(URL url) throws SAXException, IOException, ParserConfigurationException {
|
||||
return getDocument(url.toString());
|
||||
}
|
||||
|
||||
|
||||
public static Document getDocument(String url) throws SAXException, IOException, ParserConfigurationException {
|
||||
return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(url);
|
||||
public static Document getDocument(URL url) throws IOException, SAXException {
|
||||
return getDocument(new InputSource(getReader(url.openConnection())));
|
||||
}
|
||||
|
||||
|
||||
public static Document getDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException {
|
||||
return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputStream);
|
||||
return getDocument(new InputSource(inputStream));
|
||||
}
|
||||
|
||||
|
||||
public static Document getDocument(InputSource source) throws IOException, SAXException {
|
||||
try {
|
||||
return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(source);
|
||||
} catch (ParserConfigurationException e) {
|
||||
// will never happen
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -129,15 +134,12 @@ public final class WebRequest {
|
||||
private static Charset getCharset(String contentType) {
|
||||
if (contentType != null) {
|
||||
// e.g. Content-Type: text/html; charset=iso-8859-1
|
||||
Pattern pattern = Pattern.compile(".*;\\s*charset=(\\S+).*", Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = pattern.matcher(contentType);
|
||||
Matcher matcher = Pattern.compile("charset=(\\p{Graph}+)").matcher(contentType);
|
||||
|
||||
if (matcher.matches()) {
|
||||
String charsetName = matcher.group(1);
|
||||
|
||||
if (matcher.find()) {
|
||||
try {
|
||||
return Charset.forName(charsetName);
|
||||
} catch (Exception e) {
|
||||
return Charset.forName(matcher.group(1));
|
||||
} catch (IllegalArgumentException e) {
|
||||
Logger.getLogger(WebRequest.class.getName()).log(Level.WARNING, e.getMessage());
|
||||
}
|
||||
}
|
||||
@ -148,6 +150,9 @@ public final class WebRequest {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
*/
|
||||
private WebRequest() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
@ -129,18 +129,6 @@ public class AnidbClientTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void selectEnglishTitle() throws Exception {
|
||||
assertEquals("Banner of the Stars", anidb.selectOfficialTitle(getHtmlDocument(new URL("http://anidb.net/a4")), "English"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void selectJapaneseTitle() throws Exception {
|
||||
assertEquals("十二国記", anidb.selectOfficialTitle(getHtmlDocument(twelvekingdomsSearchResult.getURL()), "Japanese"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListLink() throws Exception {
|
||||
assertEquals(monsterSearchResult.getURL().toString(), anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());
|
||||
|
Loading…
Reference in New Issue
Block a user