* implement guessMovie feature

This commit is contained in:
Reinhard Pointner 2015-05-11 13:57:04 +00:00
parent d1c6ef75a8
commit e55526c3ac
9 changed files with 140 additions and 39 deletions

View File

@ -4,7 +4,8 @@ import org.tukaani.xz.*
/* ------------------------------------------------------------------------- */
def dir_website = "../website"
def dir_root = ".."
def dir_website = "${dir_root}/website"
def dir_data = "${dir_website}/data"
def sortRegexList(path) {
@ -29,7 +30,7 @@ sortRegexList("${dir_data}/add-series-alias.txt")
def reviews = []
new File('reviews.csv').eachLine('UTF-8'){
new File("${dir_root}/reviews.csv").eachLine('UTF-8'){
def s = it.split(';', 3)
reviews << [user: s[0], date: s[1], text: s[2].replaceAll(/^\"|\"$/, '').replaceAll(/["]{2}/, '"') ]
}
@ -58,7 +59,7 @@ def pack(file, lines) {
}
def rows = lines.size()
def columns = lines.collect{ it.split(/\t/).length }.max()
println "$file ($rows rows, $columns columns)"
println "${file.canonicalFile} ($rows rows, $columns columns)"
}
@ -119,7 +120,7 @@ new File('osdb.txt').eachLine('UTF-8'){
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
if (fields.size() == 6 && fields[1] ==~ /\d+/ && fields[3] ==~ /\d{4}/) {
if (fields[4] ==~ /movie|tv.series/ && isValidMovieName(fields[2]) && (fields[3] as int) >= 1970 && (fields[5] as int) >= 100) {
osdb << [fields[1] as int, fields[2], fields[3] as int, fields[4] == /movie/ ? 'm' : fields[4] == /movie/ ? 's' : '?', fields[5] as int]
osdb << [fields[1] as int, fields[2], fields[3] as int, fields[4] == /movie/ ? 'm' : fields[4] == /tv series/ ? 's' : '?', fields[5] as int]
}
}
}
@ -137,7 +138,7 @@ pack(osdb_out, osdb*.join('\t'))
// BUILD moviedb index
def omdb = []
new File('omdb.txt').eachLine('Windows-1252'){
new File('omdbMovies.txt').eachLine('Windows-1252'){
def line = it.split(/\t/)
if (line.length > 11 && line[0] ==~ /\d+/ && line[3] ==~ /\d{4}/) {
def imdbid = line[1].substring(2).toInteger()

View File

@ -456,7 +456,7 @@ public class ReleaseInfo {
int imdbid = parseInt(row[0]);
String name = row[1];
int year = parseInt(row[2]);
char kind = row[3].charAt(0);
String kind = row[3];
int score = parseInt(row[4]);
result.add(new SubtitleSearchResult(imdbid, name, year, kind, score));
}

View File

@ -17,7 +17,6 @@ import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
@ -66,6 +65,7 @@ public final class SubtitleUtilities {
throw new InterruptedException();
// auto-detect query and search for subtitles
Collection<SubtitleSearchResult> guessSet = new LinkedHashSet<SubtitleSearchResult>();
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
List<File> files = bySeries.getValue();
@ -104,11 +104,11 @@ public final class SubtitleUtilities {
}
}
if (!searchByMovie && !searchBySeries)
if (!searchByMovie && !searchBySeries && guessSet.isEmpty())
continue;
// search for subtitles online using the auto-detected or forced query information
Set<SubtitleDescriptor> subtitles = findSubtitles(service, querySet, searchByMovie, searchBySeries, languageName);
Set<SubtitleDescriptor> subtitles = findSubtitles(service, guessSet, querySet, searchByMovie, searchBySeries, languageName);
// allow early abort
if (Thread.interrupted())
@ -191,18 +191,24 @@ public final class SubtitleUtilities {
return subtitleByVideo;
}
public static Set<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, boolean searchByMovie, boolean searchBySeries, String languageName) throws Exception {
public static Set<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<SubtitleSearchResult> guessSet, Collection<String> querySet, boolean searchByMovie, boolean searchBySeries, String languageName) throws Exception {
Set<SubtitleDescriptor> subtitles = new LinkedHashSet<SubtitleDescriptor>();
// search for and automatically select movie / show entry
Set<SubtitleSearchResult> resultSet = new HashSet<SubtitleSearchResult>();
Set<SubtitleSearchResult> resultSet = new LinkedHashSet<SubtitleSearchResult>();
// add known results first
resultSet.addAll(guessSet);
resultSet.addAll(findProbableSearchResults(service, querySet, searchByMovie, searchBySeries, languageName));
for (String query : querySet) {
// search and filter by movie/series as required
Stream<SubtitleSearchResult> searchResults = service.search(query).stream().filter((it) -> {
return (searchByMovie && it.isMovie()) || (searchBySeries && it.isSeries());
});
resultSet.addAll(findProbableSearchResults(query, searchResults::iterator, querySet.size() == 1 ? 4 : 2));
resultSet.addAll(filterProbableSearchResults(query, searchResults::iterator, querySet.size() == 1 ? 4 : 2));
}
// fetch subtitles for all search results
@ -213,9 +219,24 @@ public final class SubtitleUtilities {
return subtitles;
}
protected static Collection<SubtitleSearchResult> findProbableSearchResults(String query, Iterable<SubtitleSearchResult> searchResults, int limit) {
protected static List<SubtitleSearchResult> findProbableSearchResults(SubtitleProvider service, Collection<String> querySet, boolean searchByMovie, boolean searchBySeries, String languageName) throws Exception {
// search for and automatically select movie / show entry
List<SubtitleSearchResult> resultSet = new ArrayList<SubtitleSearchResult>();
for (String query : querySet) {
// search and filter by movie/series as required
Stream<SubtitleSearchResult> searchResults = service.search(query).stream().filter((it) -> {
return (searchByMovie && it.isMovie()) || (searchBySeries && it.isSeries());
});
resultSet.addAll(filterProbableSearchResults(query, searchResults::iterator, querySet.size() == 1 ? 4 : 2));
}
return resultSet;
}
protected static List<SubtitleSearchResult> filterProbableSearchResults(String query, Iterable<SubtitleSearchResult> searchResults, int limit) {
// auto-select most probable search result
Set<SubtitleSearchResult> probableMatches = new LinkedHashSet<SubtitleSearchResult>();
List<SubtitleSearchResult> probableMatches = new ArrayList<SubtitleSearchResult>();
// use name similarity metric
SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric());

View File

@ -60,6 +60,7 @@ import net.filebot.util.ui.EmptySelectionModel;
import net.filebot.web.Movie;
import net.filebot.web.OpenSubtitlesClient;
import net.filebot.web.SearchResult;
import net.filebot.web.SubtitleSearchResult;
import net.filebot.web.TheTVDBSearchResult;
import net.filebot.web.TheTVDBSeriesInfo;
import net.filebot.web.VideoHashSubtitleService.CheckResult;
@ -163,7 +164,7 @@ public class SubtitleUploadDialog extends JDialog {
File video = mapping.getVideo() != null ? mapping.getVideo() : mapping.getSubtitle();
String input = showInputDialog("Enter movie / series name:", stripReleaseInfo(FileUtilities.getName(video)), String.format("%s/%s", video.getParentFile().getName(), video.getName()), SubtitleUploadDialog.this);
if (input != null && input.length() > 0) {
List<Movie> options = database.searchIMDB(input);
List<SubtitleSearchResult> options = database.searchIMDB(input);
if (options.size() > 0) {
SelectDialog<Movie> dialog = new SelectDialog<Movie>(SubtitleUploadDialog.this, options);
dialog.setLocation(getOffsetLocation(dialog.getOwner()));

View File

@ -92,6 +92,25 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
throw new UnsupportedOperationException(); // XMLRPC::SearchMoviesOnIMDB is not allowed due to abuse
}
@Override
public List<SubtitleSearchResult> guess(String tag) throws Exception {
List<SubtitleSearchResult> subtitles = getCache().getSearchResult("guess", tag);
System.out.println(tag);
System.out.println(subtitles);
if (subtitles != null) {
return subtitles;
}
// require login
login();
subtitles = xmlrpc.guessMovie(singleton(tag)).getOrDefault(tag, emptyList());
System.out.println(subtitles);
getCache().putSearchResult("guess", tag, subtitles);
return subtitles;
}
@Override
public synchronized List<SubtitleDescriptor> getSubtitleList(SubtitleSearchResult searchResult, String languageName) throws Exception {
List<SubtitleDescriptor> subtitles = getCache().getSubtitleDescriptorList(searchResult, languageName);
@ -350,11 +369,11 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
throw new UnsupportedOperationException();
}
public synchronized List<Movie> searchIMDB(String query) throws Exception {
public synchronized List<SubtitleSearchResult> searchIMDB(String query) throws Exception {
// search for movies and series
List<SearchResult> result = getCache().getSearchResult("search", query, null);
List<SubtitleSearchResult> result = getCache().getSearchResult("search", query);
if (result != null) {
return (List) result;
return (List<SubtitleSearchResult>) result;
}
// require login
@ -362,15 +381,14 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
try {
// search for movies / series
List<Movie> resultSet = xmlrpc.searchMoviesOnIMDB(query);
result = asList(resultSet.toArray(new SearchResult[0]));
result = xmlrpc.searchMoviesOnIMDB(query);
} catch (ClassCastException e) {
// unexpected xmlrpc responses (e.g. error messages instead of results) will trigger this
throw new XmlRpcException("Illegal XMLRPC response on searchMoviesOnIMDB");
}
getCache().putSearchResult("search", query, null, result);
return (List) result;
getCache().putSearchResult("search", query, result);
return result;
}
@Override
@ -608,9 +626,9 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
return query == null ? null : query.trim().toLowerCase();
}
public <T extends SearchResult> List<T> putSearchResult(String method, String query, Locale locale, List<T> value) {
public <T extends SubtitleSearchResult> List<T> putSearchResult(String method, String query, List<T> value) {
try {
cache.put(new Key(id, normalize(query)), value.toArray(new SearchResult[0]));
cache.put(new Key(id, method, normalize(query)), value.toArray(new SubtitleSearchResult[0]));
} catch (Exception e) {
Logger.getLogger(OpenSubtitlesClient.class.getName()).log(Level.WARNING, e.getMessage());
}
@ -619,9 +637,9 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
}
@SuppressWarnings("unchecked")
public List<SearchResult> getSearchResult(String method, String query, Locale locale) {
public List<SubtitleSearchResult> getSearchResult(String method, String query) {
try {
SearchResult[] array = cache.get(new Key(id, normalize(query)), SearchResult[].class);
SubtitleSearchResult[] array = cache.get(new Key(id, method, normalize(query)), SubtitleSearchResult[].class);
if (array != null) {
return Arrays.asList(array);
}

View File

@ -112,11 +112,11 @@ public class OpenSubtitlesXmlRpc {
}
@SuppressWarnings("unchecked")
public List<Movie> searchMoviesOnIMDB(String query) throws XmlRpcFault {
public List<SubtitleSearchResult> searchMoviesOnIMDB(String query) throws XmlRpcFault {
Map<?, ?> response = invoke("SearchMoviesOnIMDB", token, query);
List<Map<String, String>> movieData = (List<Map<String, String>>) response.get("data");
List<Movie> movies = new ArrayList<Movie>();
List<SubtitleSearchResult> movies = new ArrayList<SubtitleSearchResult>();
// title pattern
Pattern pattern = Pattern.compile("(.+)[(](\\d{4})([/]I+)?[)]");
@ -135,7 +135,7 @@ public class OpenSubtitlesXmlRpc {
String name = matcher.group(1).replaceAll("\"", "").trim();
int year = Integer.parseInt(matcher.group(2));
movies.add(new Movie(name, year, Integer.parseInt(imdbid), -1));
movies.add(new SubtitleSearchResult(Integer.parseInt(imdbid), name, year, null, -1));
} catch (Exception e) {
Logger.getLogger(OpenSubtitlesXmlRpc.class.getName()).log(Level.FINE, String.format("Ignore movie [%s]: %s", movie, e.getMessage()));
}
@ -233,6 +233,37 @@ public class OpenSubtitlesXmlRpc {
return subHashMap;
}
public Map<String, List<SubtitleSearchResult>> guessMovie(Collection<String> tags) throws XmlRpcFault {
Map<String, List<SubtitleSearchResult>> results = new HashMap<String, List<SubtitleSearchResult>>();
Map<?, ?> response = invoke("GuessMovie", token, tags);
Object payload = response.get("data");
if (payload instanceof Map) {
Map<String, List<Map<String, ?>>> guessMovieData = (Map<String, List<Map<String, ?>>>) payload;
for (String tag : tags) {
List<SubtitleSearchResult> value = new ArrayList<>();
List<Map<String, ?>> matches = guessMovieData.get(tag);
if (matches != null) {
for (Map<String, ?> match : matches) {
String name = String.valueOf(match.get("MovieName"));
String kind = String.valueOf(match.get("MovieKind"));
int imdbid = Integer.parseInt(String.valueOf(match.get("IDMovieIMDB")));
int year = Integer.parseInt(String.valueOf(match.get("MovieYear")));
int score = Integer.parseInt(String.valueOf(match.get("score")));
value.add(new SubtitleSearchResult(imdbid, name, year, kind, score));
}
}
results.put(tag, value);
}
}
return results;
}
@SuppressWarnings("unchecked")
public Map<String, Movie> checkMovieHash(Collection<String> hashes, int minSeenCount) throws XmlRpcFault {
Map<String, Movie> movieHashMap = new HashMap<String, Movie>();

View File

@ -9,6 +9,8 @@ public interface SubtitleProvider {
public List<SubtitleSearchResult> search(String query) throws Exception;
public List<SubtitleSearchResult> guess(String tag) throws Exception;
public List<SubtitleDescriptor> getSubtitleList(SubtitleSearchResult searchResult, String languageName) throws Exception;
public URI getSubtitleListLink(SubtitleSearchResult searchResult, String languageName);

View File

@ -4,20 +4,36 @@ import java.util.Locale;
public class SubtitleSearchResult extends Movie {
public static final char KIND_MOVIE = 'm';
public static final char KIND_SERIES = 's';
enum Kind {
Movie, Series, Other, Unkown;
private char kind;
public static Kind forName(String s) {
if (s == null || s.isEmpty())
return Unkown;
else if (s.equalsIgnoreCase("m") || s.equalsIgnoreCase("movie"))
return Movie;
if (s.equalsIgnoreCase("s") || s.equalsIgnoreCase("tv series"))
return Series;
else
return Other;
}
}
private Kind kind;
private int score;
public SubtitleSearchResult(int imdbId, String name, int year, char kind, int score) {
super(name, null, year, imdbId, -1, Locale.ENGLISH);
public SubtitleSearchResult(int imdbId, String name, int year, String kind, int score) {
this(name, null, year, imdbId, -1, Locale.ENGLISH, Kind.forName(kind), score);
}
public SubtitleSearchResult(String name, String[] aliasNames, int year, int imdbId, int tmdbId, Locale locale, Kind kind, int score) {
super(name, aliasNames, year, imdbId, tmdbId, locale);
this.kind = kind;
this.score = score;
}
public char getKind() {
public Kind getKind() {
return kind;
}
@ -26,11 +42,11 @@ public class SubtitleSearchResult extends Movie {
}
public boolean isMovie() {
return kind == KIND_MOVIE;
return kind == Kind.Movie;
}
public boolean isSeries() {
return kind == KIND_SERIES;
return kind == Kind.Series;
}
}

View File

@ -27,9 +27,20 @@ public class OpenSubtitlesXmlRpcTest {
xmlrpc.loginAnonymous();
}
@Test
public void guessMovie() throws Exception {
Map<String, List<SubtitleSearchResult>> results = xmlrpc.guessMovie(singleton("himym.s13.e12"));
SubtitleSearchResult result = results.get("himym.s13.e12").get(0);
assertEquals(460649, result.getImdbId());
assertEquals("How I Met Your Mother", result.getName());
assertEquals(2005, result.getYear());
assertEquals("Series", result.getKind().toString());
}
@Test
public void search() throws Exception {
List<Movie> list = xmlrpc.searchMoviesOnIMDB("babylon 5");
List<SubtitleSearchResult> list = xmlrpc.searchMoviesOnIMDB("babylon 5");
Movie sample = list.get(0);
// check sample entry
@ -40,7 +51,7 @@ public class OpenSubtitlesXmlRpcTest {
@Test(expected = IndexOutOfBoundsException.class)
public void searchOST() throws Exception {
List<Movie> list = xmlrpc.searchMoviesOnIMDB("Linkin.Park.New.Divide.1280-720p.Transformers.Revenge.of.the.Fallen.ost");
List<SubtitleSearchResult> list = xmlrpc.searchMoviesOnIMDB("Linkin.Park.New.Divide.1280-720p.Transformers.Revenge.of.the.Fallen.ost");
// seek to OST entry, expect to fail
for (int i = 0; !list.get(i).getName().contains("Linkin.Park"); i++)