* use "matching title" in anidb search results

* use official english anime title * much faster episode information extraction (less xpath)
2025-03-09 13:59:49 -04:00 · 2009-05-25 20:13:30 +00:00 · 2009-05-25 20:13:30 +00:00 · ec4254e687
commit ec4254e687
parent 7601be3b46
3 changed files with 100 additions and 33 deletions
--- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
+++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
@ -281,11 +281,14 @@ public class SeriesNameMatcher {
 	}
 	

-	protected String[] names(List<File> files) {
+	protected String[] names(Collection<File> files) {
 		String[] names = new String[files.size()];
 		
-		for (int i = 0; i < names.length; i++) {
-			names[i] = FileUtilities.getName(files.get(i));
+		int i = 0;
+		
+		// fill array
+		for (File file : files) {
+			names[i++] = FileUtilities.getName(file);
 		}
 		
 		return names;
--- a/source/net/sourceforge/filebot/web/AnidbClient.java
+++ b/source/net/sourceforge/filebot/web/AnidbClient.java
@ -12,6 +12,7 @@ import java.net.URL;
 import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.logging.Level;
 import java.util.logging.Logger;

@ -43,47 +44,80 @@ public class AnidbClient implements EpisodeListProvider {

 	@Override
 	public List<SearchResult> search(String query) throws IOException, SAXException {
-		
+		// Air Status: ignore
+		// Anime Type: TV Series, TV Special, OVA
+		// Hide Synonyms: true
 		URL searchUrl = new URL("http", host, "/perl-bin/animedb.pl?type.tvspecial=1&type.tvseries=1&type.ova=1&show=animelist&orderby.name=0.1&noalias=1&do.update=update&adb.search=" + URLEncoder.encode(query, "UTF-8"));
 		
 		Document dom = getHtmlDocument(searchUrl);
 		
 		List<Node> nodes = selectNodes("//TABLE[@class='animelist']//TR/TD/ancestor::TR", dom);
 		
-		List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
+		List<SearchResult> results = new ArrayList<SearchResult>(nodes.size());
 		
 		for (Node node : nodes) {
-			Node titleNode = selectNode("./TD[@class='name']/A", node);
+			Node link = selectNode("./TD[@class='name']/A", node);
 			
-			String title = getTextContent(titleNode);
-			String href = getAttribute("href", titleNode);
+			// prefer title that is similar to the search query
+			String title = selectString("./following-sibling::*[@class='match']", link);
+			
+			// remove leading and trailing parenthesis
+			title = title.replaceAll("(^\\()|(\\)$)", "");
+			
+			if (title.isEmpty()) {
+				// fallback: use main title
+				title = getTextContent(link);
+			}
+			
+			// anime page
+			String href = getAttribute("href", link);
 			
 			try {
-				searchResults.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
+				results.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
 			} catch (MalformedURLException e) {
 				Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href);
 			}
 		}
 		
 		// we might have been redirected to the episode list page
-		if (searchResults.isEmpty()) {
-			// check if current page contains an episode list
-			if (exists("//TABLE[@class='eplist']", dom)) {
-				// get show's name from the document
-				String header = selectString("id('layout-content')//H1[1]", dom);
-				String name = header.replaceFirst("Anime:\\s*", "");
-				
-				String episodeListUrl = selectString("id('layout-main')//DIV[@class='data']//A[@class='short_link']/@href", dom);
-				
-				try {
-					searchResults.add(new HyperLink(name, new URL(episodeListUrl)));
-				} catch (MalformedURLException e) {
-					Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + episodeListUrl);
-				}
+		if (results.isEmpty()) {
+			// get anime information from document
+			String title = selectTitle(dom);
+			String link = selectString("//*[@class='data']//A[@class='short_link']/@href", dom);
+			
+			try {
+				// insert single entry
+				results.add(new HyperLink(title, new URL(link)));
+			} catch (MalformedURLException e) {
+				Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid location: " + link);
 			}
 		}
 		
-		return searchResults;
+		return results;
+	}
+	
+
+	protected String selectTitle(Document animePage) {
+		// prefer official english title
+		String title = selectOfficialTitle(animePage, Locale.ENGLISH);
+		
+		if (title.isEmpty()) {
+			// fallback: extract name from header (e.g. "Anime: Naruto")
+			title = selectString("//H1", animePage).replaceFirst("Anime:\\s*", "");;
+		}
+		
+		return title;
+	}
+	
+
+	protected String selectOfficialTitle(Document animePage, Locale language) {
+		// create xpath query for official title of the given language
+		// e.g. //*[@class='data']//*[contains(@class, 'official') and .//*[contains(@title, 'english')]]//LABEL
+		
+		String condition = String.format(".//*[contains(@title, '%s')]", language.getDisplayLanguage(Locale.ENGLISH).toLowerCase());
+		String xpath = String.format("//*[@class='data']//*[contains(@class, 'official') and %s]//LABEL", condition);
+		
+		return selectString(xpath, animePage);
 	}
 	

@ -92,22 +126,23 @@ public class AnidbClient implements EpisodeListProvider {
 		
 		Document dom = getHtmlDocument(getEpisodeListLink(searchResult).toURL());
 		
+		// use title from anime page
+		String animeTitle = selectTitle(dom);
+		
 		List<Node> nodes = selectNodes("id('eplist')//TR/TD/SPAN/ancestor::TR", dom);
 		
 		ArrayList<Episode> episodes = new ArrayList<Episode>(nodes.size());
 		
 		for (Node node : nodes) {
-			String number = selectString("./TD[contains(@class,'id')]/A", node);
-			String title = selectString("./TD[@class='title']/LABEL/text()", node);
+			List<Node> columns = getChildren("TD", node);
 			
-			if (title.startsWith("recap")) {
-				title = title.replaceFirst("recap", "");
-			}
+			String number = columns.get(0).getTextContent().trim();
+			String title = columns.get(1).getTextContent().trim();
 			
 			// if number does not match, episode is probably some kind of special (S1, S2, ...)
 			if (number.matches("\\d+")) {
 				// no seasons for anime
-				episodes.add(new Episode(searchResult.getName(), null, number, title));
+				episodes.add(new Episode(animeTitle, null, number, title));
 			}
 		}
 		
--- a/test/net/sourceforge/filebot/web/AnidbClientTest.java
+++ b/test/net/sourceforge/filebot/web/AnidbClientTest.java
@ -2,10 +2,12 @@
 package net.sourceforge.filebot.web;


+import static net.sourceforge.filebot.web.WebRequest.*;
 import static org.junit.Assert.*;

 import java.net.URL;
 import java.util.List;
+import java.util.Locale;

 import org.junit.BeforeClass;
 import org.junit.Test;
@ -62,14 +64,25 @@ public class AnidbClientTest {
 	

 	@Test
-	public void searchResultPageRedirect() throws Exception {
+	public void searchReturnMatchingTitle() throws Exception {
+		// Seikai no Senki (main title), Banner of the Stars (official english title)
+		assertEquals("Banner of the Stars", anidb.search("banner of the stars").get(0).getName());
+		assertEquals("Seikai no Senki", anidb.search("seikai no senki").get(0).getName());
+		
+		// no matching title
+		assertEquals("Naruto", anidb.search("naruto").get(0).getName());
+	}
+	
+
+	@Test
+	public void searchPageRedirect() throws Exception {
 		List<SearchResult> results = anidb.search("twelve kingdoms");
 		
 		assertEquals(1, results.size());
 		
 		HyperLink result = (HyperLink) results.get(0);
 		
-		assertEquals("Juuni Kokuki", result.getName());
+		assertEquals("The Twelve Kingdoms", result.getName());
 		assertEquals("http://anidb.net/a26", result.getURL().toString());
 	}
 	
@ -97,13 +110,29 @@ public class AnidbClientTest {
 		
 		Episode first = list.get(0);
 		
-		assertEquals("Juuni Kokuki", first.getSeriesName());
+		assertEquals("The Twelve Kingdoms", first.getSeriesName());
 		assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle());
 		assertEquals("1", first.getEpisode());
 		assertEquals(null, first.getSeason());
 	}
 	

+	@Test
+	public void selectTitle() throws Exception {
+		// use official english title
+		assertEquals("Banner of the Stars", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a4"))));
+		
+		// official english title not available -> use main title
+		assertEquals("Turn A Gundam", anidb.selectTitle(getHtmlDocument(new URL("http://anidb.net/a916"))));
+	}
+	
+
+	@Test
+	public void selectJapaneseTitle() throws Exception {
+		assertEquals("十二国記", anidb.selectOfficialTitle(getHtmlDocument(twelvekingdomsSearchResult.getURL()), Locale.JAPANESE));
+	}
+	
+
 	@Test
 	public void getEpisodeListLink() throws Exception {
 		assertEquals(monsterSearchResult.getURL().toString(), anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());