* simplify SubsceneSubtitleClient

* use less xpath to speed things up a little bit
This commit is contained in:
Reinhard Pointner 2009-02-21 12:48:52 +00:00
parent e643466653
commit d2f5c8e572
11 changed files with 145 additions and 184 deletions

View File

@ -59,7 +59,7 @@ class AutoFetchEpisodeListMatcher extends SwingWorker<List<Match<File, Episode>>
Collection<String> names = new SeriesNameMatcher().matchAll(files.toArray(new File[0]));
if (names.isEmpty())
throw new IllegalArgumentException("Cannot auto-detect series name.");
throw new IllegalArgumentException("Cannot determine series name.");
return names;
}

View File

@ -47,6 +47,11 @@ class RenameList<E> extends FileBotList<E> {
}
public JViewport getViewPort() {
return listScrollPane.getViewport();
}
@Override
public void setTransferablePolicy(TransferablePolicy transferablePolicy) {
super.setTransferablePolicy(transferablePolicy);
@ -64,11 +69,6 @@ class RenameList<E> extends FileBotList<E> {
return true;
}
public JViewport getViewPort() {
return listScrollPane.getViewport();
}
private final LoadAction loadAction = new LoadAction(null);
private final AbstractAction upAction = new AbstractAction(null, ResourceManager.getIcon("action.up")) {

View File

@ -156,6 +156,7 @@ public class RenamePanel extends FileBotPanel {
@Override
public void propertyChange(PropertyChangeEvent evt) {
// disable action while loading is in progress
setEnabled(!(Boolean) evt.getNewValue());
}
});
@ -164,10 +165,6 @@ public class RenamePanel extends FileBotPanel {
@Override
public void actionPerformed(ActionEvent evt) {
if (model.files().isEmpty()) {
return;
}
// auto-match in progress
namesList.firePropertyChange(LOADING_PROPERTY, false, true);

View File

@ -4,6 +4,8 @@ package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.getHtmlDocument;
import static net.sourceforge.tuned.XPathUtilities.exists;
import static net.sourceforge.tuned.XPathUtilities.getAttribute;
import static net.sourceforge.tuned.XPathUtilities.getTextContent;
import static net.sourceforge.tuned.XPathUtilities.selectNode;
import static net.sourceforge.tuned.XPathUtilities.selectNodes;
import static net.sourceforge.tuned.XPathUtilities.selectString;
@ -59,13 +61,11 @@ public class AnidbClient implements EpisodeListClient {
for (Node node : nodes) {
Node titleNode = selectNode("./TD[@class='name']/A", node);
String title = selectString(".", titleNode);
String href = selectString("@href", titleNode);
String path = "/perl-bin/" + href;
String title = getTextContent(titleNode);
String href = getAttribute("href", titleNode);
try {
searchResults.add(new HyperLink(title, new URL("http", host, path)));
searchResults.add(new HyperLink(title, new URL("http", host, "/perl-bin/" + href)));
} catch (MalformedURLException e) {
Logger.getLogger("global").log(Level.WARNING, "Invalid href: " + href);
}

View File

@ -3,7 +3,10 @@ package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.getHtmlDocument;
import static net.sourceforge.tuned.XPathUtilities.selectNode;
import static net.sourceforge.tuned.XPathUtilities.getAttribute;
import static net.sourceforge.tuned.XPathUtilities.getChild;
import static net.sourceforge.tuned.XPathUtilities.getChildren;
import static net.sourceforge.tuned.XPathUtilities.getTextContent;
import static net.sourceforge.tuned.XPathUtilities.selectNodes;
import static net.sourceforge.tuned.XPathUtilities.selectString;
@ -14,11 +17,10 @@ import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
@ -38,7 +40,7 @@ public class SubsceneSubtitleClient implements SubtitleClient {
private static final String host = "subscene.com";
private final Map<String, Integer> languageFilterMap = new ConcurrentHashMap<String, Integer>(50);
private final Map<String, Integer> languageFilterMap = new HashMap<String, Integer>(50);
@Override
@ -65,15 +67,11 @@ public class SubsceneSubtitleClient implements SubtitleClient {
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
for (Node node : nodes) {
String title = selectString("text()", node);
String href = selectString("@href", node);
String count = selectString("./DFN", node).replaceAll("\\D+", "");
String title = getTextContent(node);
String href = getAttribute("href", node);
try {
URL subtitleListUrl = new URL("http", host, href);
int subtitleCount = Integer.parseInt(count);
searchResults.add(new SubsceneSearchResult(title, subtitleListUrl, subtitleCount));
searchResults.add(new HyperLink(title, new URL("http", host, href)));
} catch (MalformedURLException e) {
Logger.getLogger("global").log(Level.WARNING, "Invalid href: " + href, e);
}
@ -81,24 +79,16 @@ public class SubsceneSubtitleClient implements SubtitleClient {
// we might have been redirected to the subtitle list
if (searchResults.isEmpty()) {
int subtitleNodeCount = getSubtitleNodes(dom).size();
// check if document is a subtitle list
if (subtitleNodeCount > 0) {
try {
// get name of current search result
String name = selectString("id('leftWrapperWide')//H1/text()", dom);
// get current location
String file = selectString("id('aspnetForm')/@action", dom);
try {
URL url = new URL("http", host, file);
searchResults.add(new SubsceneSearchResult(name, url, subtitleNodeCount));
} catch (MalformedURLException e) {
Logger.getLogger("global").log(Level.WARNING, "Invalid location: " + file, e);
}
searchResults.add(new HyperLink(name, new URL("http", host, file)));
} catch (Exception e) {
Logger.getLogger("global").log(Level.WARNING, "Cannot parse subtitle page: " + searchUrl, e);
}
}
@ -106,116 +96,49 @@ public class SubsceneSubtitleClient implements SubtitleClient {
}
private void updateLanguageFilterMap(Document subtitleListDocument) {
List<Node> nodes = selectNodes("//DIV[@class='languageList']/DIV", subtitleListDocument);
for (Node node : nodes) {
String onClick = selectString("./INPUT/@onclick", node);
String filter = new Scanner(onClick).findInLine("\\d+");
if (filter != null) {
String name = selectString("./LABEL/text()", node);
languageFilterMap.put(name.toLowerCase(), Integer.valueOf(filter));
}
}
}
private Integer getLanguageFilter(String languageName) {
if (languageName == null)
return null;
return languageFilterMap.get(languageName.toLowerCase());
}
private String getLanguageName(Locale language) {
if (language == null || language == Locale.ROOT)
return null;
return language.getDisplayLanguage(Locale.ENGLISH);
}
@Override
public List<SubtitleDescriptor> getSubtitleList(SearchResult searchResult, Locale language) throws Exception {
URL subtitleListUrl = getSubtitleListLink(searchResult, language).toURL();
String languageName = getLanguageName(language);
Integer languageFilter = getLanguageFilter(languageName);
boolean reloadFilteredDocument = (languageFilter == null && useFilteredDocument(searchResult));
boolean forceReload = false;
// english language name or null
String languageName = (language == null || language.equals(Locale.ROOT) ? null : language.getDisplayLanguage(Locale.ENGLISH));
Integer languageFilter = null;
if (reloadFilteredDocument && languageFilterMap.isEmpty()) {
// we don't know the filter values yet, so we request a document with an invalid filter,
// that will return a subtitle document very fast
languageFilter = -1;
forceReload = true;
if (languageName != null) {
synchronized (languageFilterMap) {
languageFilter = languageFilterMap.get(languageName.toLowerCase());
}
}
Document subtitleListDocument = getSubtitleListDocument(subtitleListUrl, languageFilter);
// let's update language filters if they are not known yet
if (languageFilterMap.isEmpty()) {
updateLanguageFilterMap(subtitleListDocument);
}
// check if document is already filtered and if requesting a filtered document
// will result in a performance gain (Note: XPath can be very slow)
if (reloadFilteredDocument) {
languageFilter = getLanguageFilter(languageName);
// if language filter has become available, request a filtered document, or if first request was a dummy request
if (languageFilter != null || forceReload) {
subtitleListDocument = getSubtitleListDocument(subtitleListUrl, languageFilter);
synchronized (languageFilterMap) {
languageFilterMap.putAll(getLanguageFilterMap(subtitleListDocument));
}
}
return getSubtitleList(subtitleListUrl, languageName, getSubtitleNodes(subtitleListDocument));
return getSubtitleList(subtitleListUrl, languageName, subtitleListDocument);
}
private boolean useFilteredDocument(SearchResult searchResult) {
return ((SubsceneSearchResult) searchResult).getSubtitleCount() > 50;
}
private Document getSubtitleListDocument(URL subtitleListUrl, Integer languageFilter) throws IOException, SAXException {
URLConnection connection = subtitleListUrl.openConnection();
private List<SubtitleDescriptor> getSubtitleList(URL subtitleListUrl, String languageName, Document subtitleListDocument) {
if (languageFilter != null) {
connection.addRequestProperty("Cookie", "subscene_sLanguageIds=" + languageFilter);
}
return getHtmlDocument(connection);
}
private List<Node> getSubtitleNodes(Document subtitleListDocument) {
return selectNodes("//TABLE[@class='filmSubtitleList']//A[@id]//ancestor::TR", subtitleListDocument);
}
private List<SubtitleDescriptor> getSubtitleList(URL subtitleListUrl, String languageName, List<Node> subtitleNodes) {
List<Node> nodes = selectNodes("//TABLE[@class='filmSubtitleList']//A[@class='a1']", subtitleListDocument);
// match subtitleId and typeId
Pattern hrefPattern = Pattern.compile("javascript:Subtitle\\((\\d+), '(\\w+)', .*");
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(subtitleNodes.size());
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(nodes.size());
for (Node node : subtitleNodes) {
for (Node node : nodes) {
try {
Node linkNode = selectNode("./TD[1]/A", node);
String lang = selectString("./SPAN[1]", linkNode);
String lang = getTextContent(getChildren("SPAN", node).get(0));
if (languageName == null || languageName.equalsIgnoreCase(lang)) {
String href = selectString("@href", linkNode);
String name = selectString("./SPAN[2]", linkNode);
String author = selectString("./TD[4]", node);
String name = getTextContent(getChildren("SPAN", node).get(1));
String href = getAttribute("href", node);
Matcher matcher = hrefPattern.matcher(href);
@ -227,7 +150,7 @@ public class SubsceneSubtitleClient implements SubtitleClient {
URL downloadUrl = getDownloadUrl(subtitleListUrl, subtitleId, typeId);
subtitles.add(new SubsceneSubtitleDescriptor(name, lang, author, typeId, downloadUrl, subtitleListUrl));
subtitles.add(new SubsceneSubtitleDescriptor(name, lang, typeId, downloadUrl, subtitleListUrl));
}
} catch (Exception e) {
Logger.getLogger("global").log(Level.WARNING, "Cannot parse subtitle node", e);
@ -238,7 +161,39 @@ public class SubsceneSubtitleClient implements SubtitleClient {
}
private URL getDownloadUrl(URL referer, String subtitleId, String typeId) throws MalformedURLException {
protected Document getSubtitleListDocument(URL subtitleListUrl, Integer languageFilter) throws IOException, SAXException {
URLConnection connection = subtitleListUrl.openConnection();
if (languageFilter != null) {
connection.addRequestProperty("Cookie", "subscene_sLanguageIds=" + languageFilter);
}
return getHtmlDocument(connection);
}
protected Map<String, Integer> getLanguageFilterMap(Document subtitleListDocument) {
Map<String, Integer> filters = new HashMap<String, Integer>(50);
List<Node> nodes = selectNodes("//DIV[@class='languageList']/DIV", subtitleListDocument);
for (Node node : nodes) {
// select INPUT/@onclick, ditch non-number-characters
String filter = getAttribute("onclick", getChild("INPUT", node)).replaceAll("\\D+", "");
if (filter != null) {
// select LABEL/text()
String name = getTextContent("LABEL", node);
filters.put(name.toLowerCase(), Integer.valueOf(filter));
}
}
return filters;
}
protected URL getDownloadUrl(URL referer, String subtitleId, String typeId) throws MalformedURLException {
String basePath = FileUtilities.getNameWithoutExtension(referer.getFile());
String path = String.format("%s-dlpath-%s/%s.zipx", basePath, subtitleId, typeId);
@ -251,22 +206,4 @@ public class SubsceneSubtitleClient implements SubtitleClient {
return ((HyperLink) searchResult).toURI();
}
protected static class SubsceneSearchResult extends HyperLink {
private final int subtitleCount;
public SubsceneSearchResult(String name, URL url, int subtitleCount) {
super(name, url);
this.subtitleCount = subtitleCount;
}
public int getSubtitleCount() {
return subtitleCount;
}
}
}

View File

@ -12,7 +12,6 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
private final String title;
private final String language;
private final String author;
private final String typeId;
@ -20,10 +19,9 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
private final URL referer;
public SubsceneSubtitleDescriptor(String title, String language, String author, String typeId, URL downloadUrl, URL referer) {
public SubsceneSubtitleDescriptor(String title, String language, String typeId, URL downloadUrl, URL referer) {
this.title = title;
this.language = language;
this.author = author;
this.typeId = typeId;
@ -43,11 +41,6 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
}
public String getAuthor() {
return author;
}
@Override
public DownloadTask createDownloadTask() {
DownloadTask downloadTask = new DownloadTask(downloadUrl);

View File

@ -3,6 +3,8 @@ package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.getHtmlDocument;
import static net.sourceforge.tuned.XPathUtilities.getAttribute;
import static net.sourceforge.tuned.XPathUtilities.getTextContent;
import static net.sourceforge.tuned.XPathUtilities.selectNodes;
import static net.sourceforge.tuned.XPathUtilities.selectString;
@ -64,8 +66,8 @@ public class TVDotComClient implements EpisodeListClient {
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
for (Node node : nodes) {
String title = node.getTextContent();
String href = selectString("@href", node);
String title = getTextContent(node);
String href = getAttribute("href", node);
try {
URL episodeListingUrl = new URL(href.replaceFirst("summary.html\\?.*", "episode_listings.html"));

View File

@ -4,7 +4,6 @@ package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.getDocument;
import static net.sourceforge.tuned.XPathUtilities.getTextContent;
import static net.sourceforge.tuned.XPathUtilities.selectInteger;
import static net.sourceforge.tuned.XPathUtilities.selectNodes;
import static net.sourceforge.tuned.XPathUtilities.selectString;
@ -67,9 +66,9 @@ public class TVRageClient implements EpisodeListClient {
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
for (Node node : nodes) {
int showid = selectInteger("showid", node);
String name = selectString("name", node);
String link = selectString("link", node);
int showid = Integer.parseInt(getTextContent("showid", node));
String name = getTextContent("name", node);
String link = getTextContent("link", node);
searchResults.add(new TVRageSearchResult(name, showid, link));
}

View File

@ -4,7 +4,6 @@ package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.getDocument;
import static net.sourceforge.tuned.XPathUtilities.getTextContent;
import static net.sourceforge.tuned.XPathUtilities.selectInteger;
import static net.sourceforge.tuned.XPathUtilities.selectNodes;
import static net.sourceforge.tuned.XPathUtilities.selectString;
@ -94,8 +93,8 @@ public class TheTVDBClient implements EpisodeListClient {
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
for (Node node : nodes) {
int seriesId = selectInteger("seriesid", node);
String seriesName = selectString("SeriesName", node);
int seriesId = Integer.parseInt(getTextContent("seriesid", node));
String seriesName = getTextContent("SeriesName", node);
searchResults.add(new TheTVDBSearchResult(seriesName, seriesId));
}
@ -225,7 +224,7 @@ public class TheTVDBClient implements EpisodeListClient {
// get episode xml from first episode of given season
Document dom = getDocument(new URL("http", host, "/api/" + apikey + "/series/" + seriesId + "/default/" + season + "/1/en.xml"));
seasonId = selectInteger("Data/Episode/seasonid", dom);
seasonId = Integer.valueOf(selectString("Data/Episode/seasonid", dom));
cache.putSeasonId(seriesId, season, seasonId);
}
@ -261,8 +260,8 @@ public class TheTVDBClient implements EpisodeListClient {
// traverse all mirrors
for (Node node : selectNodes("Mirrors/Mirror", dom)) {
// mirror data
String mirror = selectString("mirrorpath", node);
int typeMask = selectInteger("typemask", node);
String mirror = getTextContent("mirrorpath", node);
int typeMask = Integer.parseInt(getTextContent("typemask", node));
// add mirror to the according type lists
for (MirrorType type : MirrorType.fromTypeMask(typeMask)) {

View File

@ -3,6 +3,7 @@ package net.sourceforge.tuned;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
import javax.xml.xpath.XPathConstants;
@ -43,6 +44,11 @@ public final class XPathUtilities {
}
public static boolean exists(String xpath, Object node) {
return selectNode(xpath, node) != null;
}
/**
* @param nodeName search for nodes with this name
* @param parentNode search in the child nodes of this nodes
@ -58,32 +64,51 @@ public final class XPathUtilities {
}
public static List<Node> getChildren(String nodeName, Node parentNode) {
List<Node> children = new ArrayList<Node>();
for (Node child : new NodeListDecorator(parentNode.getChildNodes())) {
if (nodeName.equals(child.getNodeName()))
children.add(child);
}
return children;
}
public static String getAttribute(String attribute, Node node) {
return node.getAttributes().getNamedItem(attribute).getNodeValue().trim();
}
/**
* Get text content of the first child node matching the given node name. Use this method
* instead of {@link #selectString(String, Object)} whenever xpath support is not required,
* because it is much faster, especially for large documents.
*
* @param nodeName search for nodes with this name
* @param childName search for nodes with this name
* @param parentNode search in the child nodes of this nodes
* @return text content of the child node or null if no child with the given name was found
*/
public static String getTextContent(String nodeName, Node parentNode) {
Node child = getChild(nodeName, parentNode);
public static String getTextContent(String childName, Node parentNode) {
Node child = getChild(childName, parentNode);
if (child == null)
if (child == null) {
return null;
}
return child.getTextContent();
return getTextContent(child);
}
public static int selectInteger(String xpath, Object node) {
return Integer.parseInt(selectString(xpath, node));
}
public static boolean exists(String xpath, Object node) {
return selectNode(xpath, node) != null;
public static String getTextContent(Node node) {
StringBuilder sb = new StringBuilder();
for (Node textNode : getChildren("#text", node)) {
sb.append(textNode.getNodeValue());
}
return sb.toString().trim();
}

View File

@ -7,9 +7,9 @@ import static org.junit.Assert.assertEquals;
import java.net.URL;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import net.sourceforge.filebot.ui.panel.subtitle.LanguageResolver;
import net.sourceforge.filebot.web.SubsceneSubtitleClient.SubsceneSearchResult;
import org.junit.BeforeClass;
import org.junit.Test;
@ -20,18 +20,18 @@ public class SubsceneSubtitleClientTest {
/**
* Twin Peaks - First Season, ~ 15 subtitles
*/
private static SubsceneSearchResult twinpeaksSearchResult;
private static HyperLink twinpeaksSearchResult;
/**
* Lost - Fourth Season, ~ 430 subtitles
*/
private static SubsceneSearchResult lostSearchResult;
private static HyperLink lostSearchResult;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
twinpeaksSearchResult = new SubsceneSearchResult("Twin Peaks - First Season (1990)", new URL("http://subscene.com/twin-peaks--first-season/subtitles-32482.aspx"), 18);
lostSearchResult = new SubsceneSearchResult("Lost - Fourth Season (2008)", new URL("http://subscene.com/Lost-Fourth-Season/subtitles-70963.aspx"), 420);
twinpeaksSearchResult = new HyperLink("Twin Peaks - First Season (1990)", new URL("http://subscene.com/twin-peaks--first-season/subtitles-32482.aspx"));
lostSearchResult = new HyperLink("Lost - Fourth Season (2008)", new URL("http://subscene.com/Lost-Fourth-Season/subtitles-70963.aspx"));
}
private SubsceneSubtitleClient subscene = new SubsceneSubtitleClient();
@ -41,11 +41,10 @@ public class SubsceneSubtitleClientTest {
public void search() throws Exception {
List<SearchResult> results = subscene.search("twin peaks");
SubsceneSearchResult result = (SubsceneSearchResult) results.get(1);
HyperLink result = (HyperLink) results.get(1);
assertEquals(twinpeaksSearchResult.getName(), result.getName());
assertEquals(twinpeaksSearchResult.getURL().toString(), result.getURL().toString());
assertEquals(twinpeaksSearchResult.getSubtitleCount(), result.getSubtitleCount());
}
@ -55,11 +54,10 @@ public class SubsceneSubtitleClientTest {
assertEquals(1, results.size());
SubsceneSearchResult result = (SubsceneSearchResult) results.get(0);
HyperLink result = (HyperLink) results.get(0);
assertEquals("Firefly - The Complete Series", result.getName());
assertEquals("http://subscene.com/Firefly-The-Complete-Series/subtitles-20008.aspx", result.getURL().toString());
assertEquals(16, result.getSubtitleCount());
}
@ -86,6 +84,17 @@ public class SubsceneSubtitleClientTest {
}
@Test
public void getLanguageFilterMap() throws Exception {
Map<String, Integer> filters = subscene.getLanguageFilterMap(subscene.getSubtitleListDocument(new URL("http://subscene.com/none/subtitles-0.aspx"), null));
assertEquals(01, filters.get("albanian"));
assertEquals(13, filters.get("english"));
assertEquals(17, filters.get("finnish"));
assertEquals(45, filters.get("vietnamese"));
}
@Test
public void getSubtitleListLink() throws Exception {
assertEquals(twinpeaksSearchResult.getURL().toString(), subscene.getSubtitleListLink(twinpeaksSearchResult, null).toURL().toString());