* use "matching title" in anidb search results

* use official english anime title
* much faster episode information extraction (less xpath)
This commit is contained in:
Reinhard Pointner 2009-05-25 20:13:30 +00:00
parent 7601be3b46
commit ec4254e687
3 changed files with 100 additions and 33 deletions

View File

@ -281,11 +281,14 @@ public class SeriesNameMatcher {
}
protected String[] names(List<File> files) {
protected String[] names(Collection<File> files) {
String[] names = new String[files.size()];
for (int i = 0; i < names.length; i++) {
names[i] = FileUtilities.getName(files.get(i));
int i = 0;
// fill array
for (File file : files) {
names[i++] = FileUtilities.getName(file);
}
return names;

View File

@ -12,6 +12,7 @@ import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
@ -43,47 +44,80 @@ public class AnidbClient implements EpisodeListProvider {
@Override
public List<SearchResult> search(String query) throws IOException, SAXException {
// Air Status: ignore
// Anime Type: TV Series, TV Special, OVA
// Hide Synonyms: true
URL searchUrl = new URL("http", host, "/perl-bin/animedb.pl?type.tvspecial=1&type.tvseries=1&type.ova=1&show=animelist&orderby.name=0.1&noalias=1&do.update=update&adb.search=" + URLEncoder.encode(query, "UTF-8"));
Document dom = getHtmlDocument(searchUrl);
List<Node> nodes = selectNodes("//TABLE[@class='animelist']//TR/TD/ancestor::TR", dom);
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
List<SearchResult> results = new ArrayList<SearchResult>(nodes.size());
for (Node node : nodes) {
Node titleNode = selectNode("./TD[@class='name']/A", node);
Node link = selectNode("./TD[@class='name']/A", node);
String title = getTextContent(titleNode);
String href = getAttribute("href", titleNode);
// prefer title that is similar to the search query
String title = selectString("./following-sibling::*[@class='match']", link);
// remove leading and trailing parenthesis
title = title.replaceAll("(^\\()|(\\)$)", "");
if (title.isEmpty()) {
// fallback: use main title
title = getTextContent(link);
}
// anime page
String href = getAttribute("href", link);
try {
searchResults.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
results.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
} catch (MalformedURLException e) {
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href);
}
}
// we might have been redirected to the episode list page
if (searchResults.isEmpty()) {
// check if current page contains an episode list
if (exists("//TABLE[@class='eplist']", dom)) {
// get show's name from the document
String header = selectString("id('layout-content')//H1[1]", dom);
String name = header.replaceFirst("Anime:\\s*", "");
String episodeListUrl = selectString("id('layout-main')//DIV[@class='data']//A[@class='short_link']/@href", dom);
try {
searchResults.add(new HyperLink(name, new URL(episodeListUrl)));
} catch (MalformedURLException e) {
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + episodeListUrl);
}
if (results.isEmpty()) {
// get anime information from document
String title = selectTitle(dom);
String link = selectString("//*[@class='data']//A[@class='short_link']/@href", dom);
try {
// insert single entry
results.add(new HyperLink(title, new URL(link)));
} catch (MalformedURLException e) {
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + link);
}
}
return searchResults;
return results;
}
protected String selectTitle(Document animePage) {
// prefer official english title
String title = selectOfficialTitle(animePage, Locale.ENGLISH);
if (title.isEmpty()) {
// fallback: extract name from header (e.g. "Anime: Naruto")
title = selectString("//H1", animePage).replaceFirst("Anime:\\s*", "");;
}
return title;
}
protected String selectOfficialTitle(Document animePage, Locale language) {
// create xpath query for official title of the given language
// e.g. //*[@class='data']//*[contains(@class, 'official') and .//*[contains(@title, 'english')]]//LABEL
String condition = String.format(".//*[contains(@title, '%s')]", language.getDisplayLanguage(Locale.ENGLISH).toLowerCase());
String xpath = String.format("//*[@class='data']//*[contains(@class, 'official') and %s]//LABEL", condition);
return selectString(xpath, animePage);
}
@ -92,22 +126,23 @@ public class AnidbClient implements EpisodeListProvider {
Document dom = getHtmlDocument(getEpisodeListLink(searchResult).toURL());
// use title from anime page
String animeTitle = selectTitle(dom);
List<Node> nodes = selectNodes("id('eplist')//TR/TD/SPAN/ancestor::TR", dom);
ArrayList<Episode> episodes = new ArrayList<Episode>(nodes.size());
for (Node node : nodes) {
String number = selectString("./TD[contains(@class,'id')]/A", node);
String title = selectString("./TD[@class='title']/LABEL/text()", node);
List<Node> columns = getChildren("TD", node);
if (title.startsWith("recap")) {
title = title.replaceFirst("recap", "");
}
String number = columns.get(0).getTextContent().trim();
String title = columns.get(1).getTextContent().trim();
// if number does not match, episode is probably some kind of special (S1, S2, ...)
if (number.matches("\\d+")) {
// no seasons for anime
episodes.add(new Episode(searchResult.getName(), null, number, title));
episodes.add(new Episode(animeTitle, null, number, title));
}
}

View File

@ -2,10 +2,12 @@
package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.*;
import static org.junit.Assert.*;
import java.net.URL;
import java.util.List;
import java.util.Locale;
import org.junit.BeforeClass;
import org.junit.Test;
@ -62,14 +64,25 @@ public class AnidbClientTest {
@Test
public void searchResultPageRedirect() throws Exception {
public void searchReturnMatchingTitle() throws Exception {
// Seikai no Senki (main title), Banner of the Stars (official english title)
assertEquals("Banner of the Stars", anidb.search("banner of the stars").get(0).getName());
assertEquals("Seikai no Senki", anidb.search("seikai no senki").get(0).getName());
// no matching title
assertEquals("Naruto", anidb.search("naruto").get(0).getName());
}
@Test
public void searchPageRedirect() throws Exception {
List<SearchResult> results = anidb.search("twelve kingdoms");
assertEquals(1, results.size());
HyperLink result = (HyperLink) results.get(0);
assertEquals("Juuni Kokuki", result.getName());
assertEquals("The Twelve Kingdoms", result.getName());
assertEquals("http://anidb.net/a26", result.getURL().toString());
}
@ -97,13 +110,29 @@ public class AnidbClientTest {
Episode first = list.get(0);
assertEquals("Juuni Kokuki", first.getSeriesName());
assertEquals("The Twelve Kingdoms", first.getSeriesName());
assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle());
assertEquals("1", first.getEpisode());
assertEquals(null, first.getSeason());
}
@Test
public void selectTitle() throws Exception {
// use official english title
assertEquals("Banner of the Stars", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a4"))));
// official english title not available -> use main title
assertEquals("Turn A Gundam", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a916"))));
}
@Test
public void selectJapaneseTitle() throws Exception {
assertEquals("十二国記", anidb.selectOfficialTitle(getHtmlDocument(twelvekingdomsSearchResult.getURL()), Locale.JAPANESE));
}
@Test
public void getEpisodeListLink() throws Exception {
assertEquals(monsterSearchResult.getURL().toString(), anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());