* better search->redirect->results handling (subscene, anidb)

* refactoring ...
This commit is contained in:
Reinhard Pointner 2008-07-07 23:38:17 +00:00
parent 9eb74e8038
commit c525aa9ea0
9 changed files with 199 additions and 58 deletions

View File

@ -27,9 +27,9 @@ import org.xml.sax.SAXException;
public class AnidbClient extends EpisodeListClient {
private final SearchResultCache cache = new SearchResultCache();
private final SearchResultCache searchResultCache = new SearchResultCache();
private final String host = "anidb.info";
private final String host = "anidb.net";
public AnidbClient() {
@ -39,8 +39,8 @@ public class AnidbClient extends EpisodeListClient {
@Override
public List<SearchResult> search(String searchterm) throws IOException, SAXException {
if (cache.containsKey(searchterm)) {
return Collections.singletonList(cache.get(searchterm));
if (searchResultCache.containsKey(searchterm)) {
return Collections.singletonList(searchResultCache.get(searchterm));
}
Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
@ -49,35 +49,40 @@ public class AnidbClient extends EpisodeListClient {
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
if (!nodes.isEmpty()) {
for (Node node : nodes) {
Node titleNode = XPathUtil.selectNode("./TD[@class='name']/A", node);
String title = XPathUtil.selectString(".", titleNode);
String href = XPathUtil.selectString("@href", titleNode);
String path = "/perl-bin/" + href;
try {
searchResults.add(new HyperLink(title, new URL("http", host, path)));
} catch (MalformedURLException e) {
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href);
}
}
} else {
// we might have been redirected to the episode list page directly
List<Node> list = XPathUtil.selectNodes("//TABLE[@class='eplist']", dom);
for (Node node : nodes) {
Node titleNode = XPathUtil.selectNode("./TD[@class='name']/A", node);
if (!list.isEmpty()) {
// get show's name from the document
String header = XPathUtil.selectString("id('layout-content')//H1[1]", dom);
String title = header.replaceFirst("Anime:\\s*", "");
searchResults.add(new HyperLink(title, getSearchUrl(searchterm)));
String title = XPathUtil.selectString(".", titleNode);
String href = XPathUtil.selectString("@href", titleNode);
String path = "/perl-bin/" + href;
try {
searchResults.add(new HyperLink(title, new URL("http", host, path)));
} catch (MalformedURLException e) {
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href);
}
}
cache.addAll(searchResults);
// we might have been redirected to the episode list page
if (searchResults.isEmpty()) {
// check if current page contains an episode list
if (XPathUtil.exists("//TABLE[@class='eplist']", dom)) {
// get show's name from the document
String header = XPathUtil.selectString("id('layout-content')//H1[1]", dom);
String name = header.replaceFirst("Anime:\\s*", "");
String episodeListUrl = XPathUtil.selectString("id('layout-main')//DIV[@class='data']//A[@class='short_link']/@href", dom);
try {
searchResults.add(new HyperLink(name, new URL(episodeListUrl)));
} catch (MalformedURLException e) {
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid location: " + episodeListUrl);
}
}
}
searchResultCache.addAll(searchResults);
return searchResults;
}
@ -106,12 +111,13 @@ public class AnidbClient extends EpisodeListClient {
try {
// try to format number of episode
number = numberFormat.format(Integer.parseInt(number));
// no seasons for anime
episodes.add(new Episode(searchResult.getName(), null, number, title));
} catch (NumberFormatException ex) {
// leave it be
// ignore node, episode is probably some kind of special (S1, S2, ...)
}
// no seasons for anime
episodes.add(new Episode(searchResult.getName(), null, number, title));
}
return episodes;

View File

@ -32,7 +32,7 @@ import org.xml.sax.SAXException;
public class SubsceneSubtitleClient extends SubtitleClient {
private final SearchResultCache cache = new SearchResultCache();
private final SearchResultCache searchResultCache = new SearchResultCache();
private final Map<String, Integer> languageFilterMap = new ConcurrentHashMap<String, Integer>(50);
@ -46,8 +46,8 @@ public class SubsceneSubtitleClient extends SubtitleClient {
@Override
public List<SearchResult> search(String searchterm) throws IOException, SAXException {
if (cache.containsKey(searchterm)) {
return Collections.singletonList(cache.get(searchterm));
if (searchResultCache.containsKey(searchterm)) {
return Collections.singletonList(searchResultCache.get(searchterm));
}
Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
@ -71,7 +71,30 @@ public class SubsceneSubtitleClient extends SubtitleClient {
}
}
cache.addAll(searchResults);
// we might have been redirected to the subtitle list
if (searchResults.isEmpty()) {
int subtitleNodeCount = getSubtitleNodes(dom).size();
// check if document is a subtitle list
if (subtitleNodeCount > 0) {
// get name of current search result
String name = XPathUtil.selectString("id('leftWrapperWide')//H1/text()", dom);
// get current url
String file = XPathUtil.selectString("id('aspnetForm')/@action", dom);
try {
URL url = new URL("http", host, file);
searchResults.add(new SubsceneSearchResult(name, url, subtitleNodeCount));
} catch (MalformedURLException e) {
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid location: " + file, e);
}
}
}
searchResultCache.addAll(searchResults);
return searchResults;
}
@ -130,6 +153,7 @@ public class SubsceneSubtitleClient extends SubtitleClient {
Document subtitleListDocument = getSubtitleListDocument(subtitleListUrl, languageFilter);
// let's update language filters if they are not known yet
if (languageFilterMap.isEmpty()) {
updateLanguageFilterMap(subtitleListDocument);
}
@ -145,13 +169,13 @@ public class SubsceneSubtitleClient extends SubtitleClient {
}
}
return getSubtitleList(subtitleListUrl, languageName, subtitleListDocument);
return getSubtitleList(subtitleListUrl, languageName, getSubtitleNodes(subtitleListDocument));
}
private boolean useFilteredDocument(SearchResult searchResult) {
SubsceneSearchResult sr = (SubsceneSearchResult) searchResult;
return sr.getSubtitleCount() > 100;
return sr.getSubtitleCount() > 50;
}
@ -166,15 +190,18 @@ public class SubsceneSubtitleClient extends SubtitleClient {
}
private List<SubtitleDescriptor> getSubtitleList(URL subtitleListUrl, String languageName, Document subtitleListDocument) {
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='filmSubtitleList']//A[@id]//ancestor::TR", subtitleListDocument);
private List<Node> getSubtitleNodes(Document subtitleListDocument) {
return XPathUtil.selectNodes("//TABLE[@class='filmSubtitleList']//A[@id]//ancestor::TR", subtitleListDocument);
}
private List<SubtitleDescriptor> getSubtitleList(URL subtitleListUrl, String languageName, List<Node> subtitleNodes) {
Pattern hrefPattern = Pattern.compile("javascript:Subtitle\\((\\d+), '(\\w+)', .*");
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(nodes.size());
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(subtitleNodes.size());
for (Node node : nodes) {
for (Node node : subtitleNodes) {
try {
Node linkNode = XPathUtil.selectFirstNode("./TD[1]/A", node);
String lang = XPathUtil.selectString("./SPAN[1]", linkNode);

View File

@ -31,7 +31,7 @@ import org.xml.sax.SAXException;
public class TVDotComClient extends EpisodeListClient {
private final SearchResultCache cache = new SearchResultCache();
private final SearchResultCache searchResultCache = new SearchResultCache();
private final String host = "www.tv.com";
@ -49,8 +49,8 @@ public class TVDotComClient extends EpisodeListClient {
@Override
public List<SearchResult> search(String searchterm) throws IOException, SAXException {
if (cache.containsKey(searchterm)) {
return Collections.singletonList(cache.get(searchterm));
if (searchResultCache.containsKey(searchterm)) {
return Collections.singletonList(searchResultCache.get(searchterm));
}
Document dom = HtmlUtil.getHtmlDocument(getSearchUrl(searchterm));
@ -72,7 +72,7 @@ public class TVDotComClient extends EpisodeListClient {
}
}
cache.addAll(searchResults);
searchResultCache.addAll(searchResults);
return searchResults;
}

View File

@ -22,7 +22,7 @@ import org.xml.sax.SAXException;
public class TVRageClient extends EpisodeListClient {
private final SearchResultCache cache = new SearchResultCache();
private final SearchResultCache searchResultCache = new SearchResultCache();
private final String host = "www.tvrage.com";
@ -40,8 +40,8 @@ public class TVRageClient extends EpisodeListClient {
@Override
public List<SearchResult> search(String searchterm) throws SAXException, IOException, ParserConfigurationException {
if (cache.containsKey(searchterm)) {
return Collections.singletonList(cache.get(searchterm));
if (searchResultCache.containsKey(searchterm)) {
return Collections.singletonList(searchResultCache.get(searchterm));
}
String searchUri = String.format("http://" + host + "/feeds/search.php?show=" + URLEncoder.encode(searchterm, "UTF-8"));
@ -60,7 +60,7 @@ public class TVRageClient extends EpisodeListClient {
searchResults.add(new TVRageSearchResult(name, showid, link));
}
cache.addAll(searchResults);
searchResultCache.addAll(searchResults);
return searchResults;
}

View File

@ -70,6 +70,11 @@ public class XPathUtil {
}
public static boolean exists(String xpath, Object node) {
return selectNode(xpath, node) != null;
}
private static XPathExpression getXPath(String xpath) throws XPathExpressionException {
return XPathFactory.newInstance().newXPath().compile(xpath);
}

View File

@ -0,0 +1,88 @@
package net.sourceforge.filebot.web;
import static org.junit.Assert.assertEquals;
import java.net.URL;
import java.util.List;
import org.junit.BeforeClass;
import org.junit.Test;
public class AnidbClientTest {
private static HyperLink testResult;
private static HyperLink shortLinkTestResult;
private AnidbClient anidb = new AnidbClient();
@BeforeClass
public static void setUpBeforeClass() throws Exception {
testResult = new HyperLink("Monster", new URL("http://anidb.net/perl-bin/animedb.pl?show=anime&aid=1539"));
shortLinkTestResult = new HyperLink("Juuni Kokuki", new URL("http://anidb.net/a26"));
}
@Test
public void search() throws Exception {
List<SearchResult> results = anidb.search("one piece");
HyperLink result = (HyperLink) results.get(0);
assertEquals("One Piece", result.getName());
assertEquals("http://anidb.net/perl-bin/animedb.pl?show=anime&aid=69", result.getURL().toString());
}
@Test
public void searchResultPageRedirect() throws Exception {
List<SearchResult> results = anidb.search("twelve kingdoms");
assertEquals(1, results.size());
HyperLink result = (HyperLink) results.get(0);
assertEquals("Juuni Kokuki", result.getName());
assertEquals("http://anidb.net/a26", result.getURL().toString());
}
@Test
public void getEpisodeListAll() throws Exception {
List<Episode> list = anidb.getEpisodeList(testResult);
assertEquals(74, list.size());
Episode first = list.get(0);
assertEquals("Monster", first.getShowName());
assertEquals("Herr Dr. Tenma", first.getTitle());
assertEquals("01", first.getNumberOfEpisode());
assertEquals(null, first.getNumberOfSeason());
}
@Test
public void getEpisodeListAllShortLink() throws Exception {
List<Episode> list = anidb.getEpisodeList(shortLinkTestResult);
assertEquals(45, list.size());
Episode first = list.get(0);
assertEquals("Juuni Kokuki", first.getShowName());
assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle());
assertEquals("01", first.getNumberOfEpisode());
assertEquals(null, first.getNumberOfSeason());
}
@Test
public void getEpisodeListLink() throws Exception {
assertEquals(testResult.getURL().toString(), anidb.getEpisodeListLink(testResult).toURL().toString());
}
}

View File

@ -20,7 +20,7 @@ public class SubsceneSubtitleClientTest {
private static SubsceneSearchResult testResult;
private static SubsceneSearchResult manySubtitlesTestResult;
private SubsceneSubtitleClient client = new SubsceneSubtitleClient();
private SubsceneSubtitleClient subscene = new SubsceneSubtitleClient();
@BeforeClass
@ -32,7 +32,7 @@ public class SubsceneSubtitleClientTest {
@Test
public void search() throws Exception {
List<SearchResult> results = client.search("twin peaks");
List<SearchResult> results = subscene.search("twin peaks");
SubsceneSearchResult result = (SubsceneSearchResult) results.get(1);
@ -42,9 +42,23 @@ public class SubsceneSubtitleClientTest {
}
@Test
public void searchResultPageRedirect() throws Exception {
List<SearchResult> results = subscene.search("firefly");
assertEquals(1, results.size());
SubsceneSearchResult result = (SubsceneSearchResult) results.get(0);
assertEquals("Firefly - The Complete Series", result.getName());
assertEquals("http://subscene.com/Firefly-The-Complete-Series/subtitles-20008.aspx", result.getURL().toString());
assertEquals(15, result.getSubtitleCount());
}
@Test
public void getSubtitleListSearchResult() throws Exception {
List<SubtitleDescriptor> subtitleList = client.getSubtitleList(testResult, Locale.ITALIAN);
List<SubtitleDescriptor> subtitleList = subscene.getSubtitleList(testResult, Locale.ITALIAN);
assertEquals(1, subtitleList.size());
@ -58,7 +72,7 @@ public class SubsceneSubtitleClientTest {
@Test
public void getSubtitleListSearchResultMany() throws Exception {
List<SubtitleDescriptor> subtitleList = client.getSubtitleList(manySubtitlesTestResult, LanguageResolver.getDefault().getLocale("Vietnamese"));
List<SubtitleDescriptor> subtitleList = subscene.getSubtitleList(manySubtitlesTestResult, LanguageResolver.getDefault().getLocale("Vietnamese"));
assertEquals(1, subtitleList.size());
}
@ -66,7 +80,7 @@ public class SubsceneSubtitleClientTest {
@Test
public void getSubtitleListLink() throws Exception {
assertEquals(testResult.getURL().toString(), client.getSubtitleListLink(testResult).toURL().toString());
assertEquals(testResult.getURL().toString(), subscene.getSubtitleListLink(testResult).toURL().toString());
}
}

View File

@ -3,6 +3,7 @@ package net.sourceforge.filebot.web;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.net.URL;
import java.util.List;
@ -88,7 +89,7 @@ public class TVDotComClientTest {
public void getEpisodeListAllManySeasons() throws Exception {
List<Episode> list = tvdotcom.getEpisodeList(manySeasonsTestResult);
assertEquals(708, list.size());
assertTrue(list.size() > 700);
}

View File

@ -11,7 +11,7 @@ import org.junit.runners.Suite.SuiteClasses;
@RunWith(Suite.class)
@SuiteClasses( { TVDotComClientTest.class, TVRageClientTest.class })
@SuiteClasses( { TVDotComClientTest.class, AnidbClientTest.class, TVRageClientTest.class, SubsceneSubtitleClientTest.class })
public class WebTestSuite {
public static Test suite() {