* trust OpenSubtitles hash matches, but only the ones with high SeenCount, and scrap any hash match that doesn't unambiguously match a single movie, i.e. scrap hash collisions

This commit is contained in:
Reinhard Pointner 2012-10-19 19:18:47 +00:00
parent e6d4115da6
commit 1163eb3f17
4 changed files with 47 additions and 24 deletions

View File

@ -205,7 +205,7 @@ def detectSeriesName(files, locale = Locale.ENGLISH) {
return names == null || names.isEmpty() ? null : names.toList()[0]
}
def detectMovie(movieFile, strict = true, queryLookupService = TheMovieDB, hashLookupService = null, locale = Locale.ENGLISH) {
def detectMovie(movieFile, strict = true, queryLookupService = TheMovieDB, hashLookupService = OpenSubtitles, locale = Locale.ENGLISH) {
def movies = MediaDetection.detectMovie(movieFile, hashLookupService, queryLookupService, locale, strict)
return movies == null || movies.isEmpty() ? null : movies.toList()[0]
}

View File

@ -281,7 +281,8 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
for (int bn = 0; bn < ceil((float) hashes.size() / batchSize); bn++) {
List<String> batch = hashes.subList(bn * batchSize, min((bn * batchSize) + batchSize, hashes.size()));
for (Entry<String, Movie> it : xmlrpc.checkMovieHash(batch).entrySet()) {
int minSeenCount = 20; // make sure we don't get mismatches by making sure the hash has not been confirmed numerous times
for (Entry<String, Movie> it : xmlrpc.checkMovieHash(batch, minSeenCount).entrySet()) {
result.put(hashMap.get(it.getKey()), it.getValue());
getCache().putData("getMovieDescriptor", it.getKey(), locale, it.getValue());
}

View File

@ -223,23 +223,45 @@ public class OpenSubtitlesXmlRpc {
@SuppressWarnings("unchecked")
public Map<String, Movie> checkMovieHash(Collection<String> hashes) throws XmlRpcFault {
Map<?, ?> response = invoke("CheckMovieHash", token, hashes);
Map<String, ?> movieHashData = (Map<String, ?>) response.get("data");
public Map<String, Movie> checkMovieHash(Collection<String> hashes, int minSeenCount) throws XmlRpcFault {
Map<String, Movie> movieHashMap = new HashMap<String, Movie>();
for (Entry<String, ?> entry : movieHashData.entrySet()) {
// empty associative arrays are deserialized as array, not as map
if (entry.getValue() instanceof Map) {
Map<String, String> info = (Map<String, String>) entry.getValue();
String hash = info.get("MovieHash");
String name = info.get("MovieName");
int year = Integer.parseInt(info.get("MovieYear"));
int imdb = Integer.parseInt(info.get("MovieImdbID"));
movieHashMap.put(hash, new Movie(name, year, imdb, -1));
Map<?, ?> response = invoke("CheckMovieHash2", token, hashes);
Object payload = response.get("data");
if (payload instanceof Map) {
Map<String, ?> movieHashData = (Map<String, ?>) payload;
for (Entry<String, ?> entry : movieHashData.entrySet()) {
// empty associative arrays are deserialized as array, not as map
if (entry.getValue() instanceof List) {
String hash = entry.getKey();
List<Movie> matches = new ArrayList<Movie>();
List<?> hashMatches = (List<?>) entry.getValue();
for (Object match : hashMatches) {
if (match instanceof Map) {
Map<String, String> info = (Map<String, String>) match;
int seenCount = Integer.parseInt(info.get("SeenCount"));
// require minimum SeenCount before this hash match is considered trusted
if (seenCount >= minSeenCount) {
String name = info.get("MovieName");
int year = Integer.parseInt(info.get("MovieYear"));
int imdb = Integer.parseInt(info.get("MovieImdbID"));
matches.add(new Movie(name, year, imdb, -1));
}
}
}
if (matches.size() == 1) {
// perfect unambiguous match
movieHashMap.put(hash, matches.get(0));
} else if (matches.size() > 1) {
// multiple hash matches => ignore all
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Ignore hash match due to hash collision: " + matches);
}
}
}
}

View File

@ -10,15 +10,15 @@ import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import net.sourceforge.filebot.web.OpenSubtitlesSubtitleDescriptor.Property;
import net.sourceforge.filebot.web.OpenSubtitlesXmlRpc.Query;
import net.sourceforge.filebot.web.OpenSubtitlesXmlRpc.SubFile;
import net.sourceforge.filebot.web.OpenSubtitlesXmlRpc.TryUploadResponse;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class OpenSubtitlesXmlRpcTest {
@ -35,7 +35,7 @@ public class OpenSubtitlesXmlRpcTest {
@Test
public void search() throws Exception {
List<Movie> list = xmlrpc.searchMoviesOnIMDB("babylon 5");
Movie sample = (Movie) list.get(0);
Movie sample = list.get(0);
// check sample entry
assertEquals("Babylon 5", sample.getName());
@ -131,7 +131,7 @@ public class OpenSubtitlesXmlRpcTest {
@Test
public void checkMovieHash() throws Exception {
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("d7aa0275cace4410"));
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("d7aa0275cace4410"), 0);
Movie movie = results.get("d7aa0275cace4410");
assertEquals("Iron Man", movie.getName());
@ -142,7 +142,7 @@ public class OpenSubtitlesXmlRpcTest {
@Test
public void checkMovieHashInvalid() throws Exception {
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("0123456789abcdef"));
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("0123456789abcdef"), 0);
// no movie info
assertTrue(results.isEmpty());