* fix imdb scraper issues

This commit is contained in:
Reinhard Pointner 2013-01-13 03:15:05 +00:00
parent f7f41509e1
commit 5fef14bedd
2 changed files with 12 additions and 1 deletions

View File

@ -85,7 +85,7 @@ public class IMDbClient implements MovieIdentificationService {
if (name.startsWith("\""))
continue;
String year = node.getNextSibling().getTextContent().replaceAll("[\\p{Punct}\\p{Space}]+", ""); // remove non-number characters
String year = node.getNextSibling().getTextContent().trim().replaceFirst("^\\(I\\)", "").replaceAll("[\\p{Punct}\\p{Space}]+", ""); // remove non-number characters
String href = getAttribute("href", node);
results.add(new Movie(name, Integer.parseInt(year), getImdbId(href), -1));

View File

@ -70,6 +70,17 @@ public class IMDbClientTest {
}
@Test
public void searchMovie6() throws Exception {
List<Movie> results = imdb.searchMovie("Drive 2011", null);
Movie movie = results.get(0);
assertEquals("Drive", movie.getName());
assertEquals(2011, movie.getYear());
assertEquals(780504, movie.getImdbId(), 0);
}
@Test
public void searchMovieRedirect() throws Exception {
List<Movie> results = imdb.searchMovie("(500) Days of Summer (2009)", null);