* heavily improved TV.com Client (better results, 2x - 4x faster)

* improved EpisodeListClient API
* unittests
This commit is contained in:
Reinhard Pointner 2008-07-06 03:17:23 +00:00
parent 1cab55e38c
commit cd09a67c5e
8 changed files with 268 additions and 79 deletions

View File

@ -6,10 +6,12 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
@ -32,7 +34,7 @@ public class AnidbClient extends EpisodeListClient {
public AnidbClient() {
super("AniDB", ResourceManager.getIcon("search.anidb"), false);
super("AniDB", ResourceManager.getIcon("search.anidb"));
};
@ -48,24 +50,24 @@ public class AnidbClient extends EpisodeListClient {
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
if (!nodes.isEmpty())
if (!nodes.isEmpty()) {
for (Node node : nodes) {
Node titleNode = XPathUtil.selectNode("./TD[@class='name']/A", node);
String title = XPathUtil.selectString(".", titleNode);
String href = XPathUtil.selectString("@href", titleNode);
String file = "/perl-bin/" + href;
String path = "/perl-bin/" + href;
try {
URL url = new URL("http", host, file);
URI animeUrl = new URI("http", host, path, null);
searchResults.add(new HyperLink(title, url));
} catch (MalformedURLException e) {
searchResults.add(new HyperLink(title, animeUrl));
} catch (URISyntaxException e) {
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href);
}
}
else {
} else {
// we might have been redirected to the episode list page directly
List<Node> list = XPathUtil.selectNodes("//TABLE[@class='eplist']", dom);
@ -74,7 +76,7 @@ public class AnidbClient extends EpisodeListClient {
String header = XPathUtil.selectString("id('layout-content')//H1[1]", dom);
String title = header.replaceFirst("Anime:\\s*", "");
searchResults.add(new HyperLink(title, getSearchUrl(searchterm)));
searchResults.add(new HyperLink(title, URI.create(getSearchUrl(searchterm).toString())));
}
}
@ -85,9 +87,9 @@ public class AnidbClient extends EpisodeListClient {
@Override
public List<Episode> getEpisodeList(SearchResult searchResult, int season) throws IOException, SAXException {
public List<Episode> getEpisodeList(SearchResult searchResult) throws IOException, SAXException {
Document dom = HtmlUtil.getHtmlDocument(getEpisodeListLink(searchResult, season));
Document dom = HtmlUtil.getHtmlDocument(getEpisodeListLink(searchResult));
List<Node> nodes = XPathUtil.selectNodes("id('eplist')//TR/TD/SPAN/ancestor::TR", dom);
@ -119,9 +121,27 @@ public class AnidbClient extends EpisodeListClient {
}
@Override
public URI getEpisodeListLink(SearchResult searchResult) {
return ((HyperLink) searchResult).getURI();
}
@Override
public boolean hasSingleSeasonSupport() {
return false;
}
@Override
public Collection<Episode> getEpisodeList(SearchResult searchResult, int season) throws Exception {
throw new UnsupportedOperationException();
}
@Override
public URI getEpisodeListLink(SearchResult searchResult, int season) {
return ((HyperLink) searchResult).toUri();
throw new UnsupportedOperationException();
}
@ -129,9 +149,9 @@ public class AnidbClient extends EpisodeListClient {
String qs = URLEncoder.encode(searchterm, "UTF-8");
// type=2 -> only TV Series
String file = "/perl-bin/animedb.pl?type=2&show=animelist&orderby.name=0.1&orderbar=0&noalias=1&do.search=Search&adb.search=" + qs;
String path = "/perl-bin/animedb.pl?type=2&show=animelist&orderby.name=0.1&orderbar=0&noalias=1&do.search=Search&adb.search=" + qs;
return new URL("http", host, file);
return new URL("http", host, path);
}
}

View File

@ -8,7 +8,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.List;
import javax.swing.ImageIcon;
import javax.swing.Icon;
public abstract class EpisodeListClient {
@ -20,7 +20,7 @@ public abstract class EpisodeListClient {
if (registry == null) {
registry = new ArrayList<EpisodeListClient>(3);
registry.add(new TvdotcomClient());
registry.add(new TVDotComClient());
registry.add(new AnidbClient());
registry.add(new TVRageClient());
}
@ -29,41 +29,43 @@ public abstract class EpisodeListClient {
}
private final String name;
private final boolean singleSeasonSupported;
private final ImageIcon icon;
private final Icon icon;
public EpisodeListClient(String name, ImageIcon icon, boolean singleSeasonSupported) {
public EpisodeListClient(String name, Icon icon) {
this.name = name;
this.icon = icon;
this.singleSeasonSupported = singleSeasonSupported;
}
public abstract Collection<SearchResult> search(String searchterm) throws Exception;
public abstract boolean hasSingleSeasonSupport();
public abstract Collection<Episode> getEpisodeList(SearchResult searchResult) throws Exception;
public abstract Collection<Episode> getEpisodeList(SearchResult searchResult, int season) throws Exception;
public abstract URI getEpisodeListLink(SearchResult searchResult);
public abstract URI getEpisodeListLink(SearchResult searchResult, int season);
public boolean isSingleSeasonSupported() {
return singleSeasonSupported;
}
public ImageIcon getIcon() {
return icon;
}
public String getName() {
return name;
}
public Icon getIcon() {
return icon;
}
@Override
public String toString() {
return name;

View File

@ -3,27 +3,27 @@ package net.sourceforge.filebot.web;
import java.net.URI;
import java.net.URL;
import java.net.URISyntaxException;
public class HyperLink extends SearchResult {
private final URL url;
private final URI uri;
public HyperLink(String name, URL url) {
public HyperLink(String name, URI uri) {
super(name);
this.url = url;
this.uri = uri;
}
public URL getUrl() {
return url;
public HyperLink(String name, String uri) throws URISyntaxException {
this(name, new URI(uri));
}
public URI toUri() {
return URI.create(url.toString());
public URI getURI() {
return uri;
}
}

View File

@ -6,6 +6,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
@ -57,10 +58,11 @@ public class SubsceneSubtitleClient extends SubtitleClient {
String href = XPathUtil.selectString("@href", node);
try {
URL url = new URL("http", host, href);
//TODO which exception?
URI url = new URI("http", host, href);
searchResults.add(new HyperLink(title, url));
} catch (MalformedURLException e) {
} catch (URISyntaxException e) {
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href, e);
}
}
@ -168,7 +170,7 @@ public class SubsceneSubtitleClient extends SubtitleClient {
@Override
public URI getSubtitleListLink(SearchResult searchResult) {
return ((HyperLink) searchResult).toUri();
return ((HyperLink) searchResult).getURI();
}

View File

@ -28,7 +28,13 @@ public class TVRageClient extends EpisodeListClient {
public TVRageClient() {
super("TVRage", ResourceManager.getIcon("search.tvrage"), true);
super("TVRage", ResourceManager.getIcon("search.tvrage"));
}
@Override
public boolean hasSingleSeasonSupport() {
return true;
}
@ -60,53 +66,48 @@ public class TVRageClient extends EpisodeListClient {
}
@Override
public List<Episode> getEpisodeList(SearchResult searchResult, int season) throws IOException, SAXException, ParserConfigurationException {
private EpisodeListFeed getEpisodeListFeed(SearchResult searchResult) throws SAXException, IOException, ParserConfigurationException {
int showId = ((TVRageSearchResult) searchResult).getShowId();
String episodeListUri = String.format("http://" + host + "/feeds/episode_list.php?sid=" + showId);
Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(episodeListUri);
int numberOfSeasons = XPathUtil.selectInteger("Show/totalseasons", dom);
if (season > numberOfSeasons)
throw new IllegalArgumentException(String.format("%s only has %d seasons", searchResult.getName(), numberOfSeasons));
Node episodeListNode = XPathUtil.selectNode("Show/Episodelist", dom);
boolean allSeasons = (season == 0);
List<Episode> episodes = new ArrayList<Episode>(24);
for (int i = 0; i <= numberOfSeasons; i++) {
if (i == season || allSeasons) {
List<Node> nodes = XPathUtil.selectNodes("Season" + i + "/episode", episodeListNode);
for (Node node : nodes) {
String title = XPathUtil.selectString("title", node);
String episodeNumber = XPathUtil.selectString("seasonnum", node);
String seasonNumber = Integer.toString(i);
episodes.add(new Episode(searchResult.getName(), seasonNumber, episodeNumber, title));
}
}
}
return episodes;
return new EpisodeListFeed(dom);
}
@Override
public List<Episode> getEpisodeList(SearchResult searchResult) throws Exception {
return getEpisodeListFeed(searchResult).getEpisodeList();
}
@Override
public List<Episode> getEpisodeList(SearchResult searchResult, int season) throws IOException, SAXException, ParserConfigurationException {
return getEpisodeListFeed(searchResult).getEpisodeList(season);
}
@Override
public URI getEpisodeListLink(SearchResult searchResult) {
return getEpisodeListLink(searchResult, "all");
}
@Override
public URI getEpisodeListLink(SearchResult searchResult, int season) {
String page = ((TVRageSearchResult) searchResult).getLink();
String seasonString = (season >= 1) ? Integer.toString(season) : "all";
return getEpisodeListLink(searchResult, Integer.toString(season));
}
private URI getEpisodeListLink(SearchResult searchResult, String seasonString) {
String base = ((TVRageSearchResult) searchResult).getLink();
return URI.create(page + "/episode_list/" + seasonString);
return URI.create(base + "/episode_list/" + seasonString);
}
public static class TVRageSearchResult extends SearchResult {
protected static class TVRageSearchResult extends SearchResult {
private final int showId;
private final String link;
@ -130,4 +131,64 @@ public class TVRageClient extends EpisodeListClient {
}
private static class EpisodeListFeed {
private final String name;
private final int totalSeasons;
private final Node episodeListNode;
public EpisodeListFeed(Document dom) {
name = XPathUtil.selectString("Show/name", dom);
totalSeasons = XPathUtil.selectInteger("Show/totalseasons", dom);
episodeListNode = XPathUtil.selectNode("Show/Episodelist", dom);
}
public String getName() {
return name;
}
public int getTotalSeasons() {
return totalSeasons;
}
public List<Episode> getEpisodeList() {
List<Episode> episodes = new ArrayList<Episode>(150);
for (int i = 0; i <= getTotalSeasons(); i++) {
episodes.addAll(getEpisodeList(i));
}
return episodes;
}
public List<Episode> getEpisodeList(int season) {
if (season > getTotalSeasons() || season < 0)
throw new IllegalArgumentException(String.format("%s only has %d seasons", getName(), getTotalSeasons()));
String seasonString = Integer.toString(season);
List<Node> nodes = XPathUtil.selectNodes("Season" + seasonString + "/episode", episodeListNode);
List<Episode> episodes = new ArrayList<Episode>(nodes.size());
for (Node node : nodes) {
String title = XPathUtil.selectString("title", node);
String episodeNumber = XPathUtil.selectString("seasonnum", node);
episodes.add(new Episode(getName(), seasonString, episodeNumber, title));
}
return episodes;
}
}
}

View File

@ -0,0 +1,97 @@
package net.sourceforge.filebot.web;
import static org.junit.Assert.assertEquals;
import java.net.URI;
import java.util.List;
import org.junit.Test;
public class TVDotComClientTest {
private static TVDotComClient tvdotcom = new TVDotComClient();
private static HyperLink testResult = new HyperLink("Buffy the Vampire Slayer", URI.create("http://www.tv.com/buffy-the-vampire-slayer/show/10/episode_listings.html"));
private static HyperLink singleSeasonTestResult = new HyperLink("Firefly", URI.create("http://www.tv.com/firefly/show/7097/episode_listings.html"));
private static HyperLink manySeasonsTestResult = new HyperLink("Doctor Who", URI.create("http://www.tv.com/doctor-who/show/355/episode_listings.html"));
@Test
public void search() throws Exception {
List<SearchResult> results = tvdotcom.search("Buffy");
HyperLink result = (HyperLink) results.get(0);
assertEquals(testResult.getName(), result.getName());
assertEquals(testResult.getURI(), result.getURI());
}
@Test
public void getEpisodeList() throws Exception {
List<Episode> results = tvdotcom.getEpisodeList(testResult, 7);
assertEquals(22, results.size());
Episode chosen = results.get(21);
assertEquals("Buffy the Vampire Slayer", chosen.getShowName());
assertEquals("Chosen", chosen.getTitle());
assertEquals("22", chosen.getNumberOfEpisode());
assertEquals("7", chosen.getNumberOfSeason());
}
@Test
public void getEpisodeListAllMultiSeason() throws Exception {
List<Episode> list = tvdotcom.getEpisodeList(testResult);
assertEquals(145, list.size());
Episode first = list.get(0);
assertEquals("Buffy the Vampire Slayer", first.getShowName());
assertEquals("Unaired Pilot", first.getTitle());
assertEquals("Pilot", first.getNumberOfEpisode());
assertEquals("1", first.getNumberOfSeason());
}
@Test
public void getEpisodeListAllSingleSeason() throws Exception {
List<Episode> list = tvdotcom.getEpisodeList(singleSeasonTestResult);
assertEquals(15, list.size());
Episode fourth = list.get(3);
assertEquals("Firefly", fourth.getShowName());
assertEquals("Jaynestown", fourth.getTitle());
assertEquals("04", fourth.getNumberOfEpisode());
assertEquals("1", fourth.getNumberOfSeason());
}
@Test
public void getEpisodeListAllManySeasons() throws Exception {
List<Episode> list = tvdotcom.getEpisodeList(manySeasonsTestResult);
assertEquals(708, list.size());
}
@Test
public void getEpisodeListLink() {
assertEquals(tvdotcom.getEpisodeListLink(testResult, 1).toString(), "http://www.tv.com/buffy-the-vampire-slayer/show/10/episode_listings.html?season=1");
}
@Test
public void getEpisodeListLinkAll() {
assertEquals(tvdotcom.getEpisodeListLink(testResult, 0).toString(), "http://www.tv.com/buffy-the-vampire-slayer/show/10/episode_listings.html?season=0");
}
}

View File

@ -13,8 +13,8 @@ import org.junit.Test;
public class TVRageClientTest {
private TVRageClient tvrage = new TVRageClient();
private TVRageSearchResult testResult = new TVRageSearchResult("Buffy the Vampire Slayer", 2930, "http://www.tvrage.com/Buffy_The_Vampire_Slayer");
private static TVRageClient tvrage = new TVRageClient();
private static TVRageSearchResult testResult = new TVRageSearchResult("Buffy the Vampire Slayer", 2930, "http://www.tvrage.com/Buffy_The_Vampire_Slayer");
@Test
@ -33,6 +33,8 @@ public class TVRageClientTest {
public void getEpisodeList() throws Exception {
List<Episode> list = tvrage.getEpisodeList(testResult, 7);
assertEquals(22, list.size());
Episode chosen = list.get(21);
assertEquals("Buffy the Vampire Slayer", chosen.getShowName());
@ -44,7 +46,7 @@ public class TVRageClientTest {
@Test
public void getEpisodeListAll() throws Exception {
List<Episode> list = tvrage.getEpisodeList(testResult, 0);
List<Episode> list = tvrage.getEpisodeList(testResult);
assertEquals(145, list.size());
@ -65,8 +67,13 @@ public class TVRageClientTest {
@Test
public void getEpisodeListLink() throws Exception {
assertEquals(tvrage.getEpisodeListLink(testResult, 0).toString(), "http://www.tvrage.com/Buffy_The_Vampire_Slayer/episode_list/all");
assertEquals(tvrage.getEpisodeListLink(testResult, 1).toString(), "http://www.tvrage.com/Buffy_The_Vampire_Slayer/episode_list/1");
}
@Test
public void getEpisodeListLinkAll() throws Exception {
assertEquals(tvrage.getEpisodeListLink(testResult).toString(), "http://www.tvrage.com/Buffy_The_Vampire_Slayer/episode_list/all");
}
}

View File

@ -11,7 +11,7 @@ import org.junit.runners.Suite.SuiteClasses;
@RunWith(Suite.class)
@SuiteClasses( { TVRageClientTest.class })
@SuiteClasses( { TVDotComClientTest.class, TVRageClientTest.class })
public class WebTestSuite {
public static Test suite() {