mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-11 05:48:01 -05:00
* trust OpenSubtitles hash matches, but only the ones with high SeenCount, and scrap any hash match that doesn't unambiguously match a single movie, i.e. scrap hash collisions
This commit is contained in:
parent
e6d4115da6
commit
1163eb3f17
@ -205,7 +205,7 @@ def detectSeriesName(files, locale = Locale.ENGLISH) {
|
||||
return names == null || names.isEmpty() ? null : names.toList()[0]
|
||||
}
|
||||
|
||||
def detectMovie(movieFile, strict = true, queryLookupService = TheMovieDB, hashLookupService = null, locale = Locale.ENGLISH) {
|
||||
def detectMovie(movieFile, strict = true, queryLookupService = TheMovieDB, hashLookupService = OpenSubtitles, locale = Locale.ENGLISH) {
|
||||
def movies = MediaDetection.detectMovie(movieFile, hashLookupService, queryLookupService, locale, strict)
|
||||
return movies == null || movies.isEmpty() ? null : movies.toList()[0]
|
||||
}
|
||||
|
@ -281,7 +281,8 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS
|
||||
for (int bn = 0; bn < ceil((float) hashes.size() / batchSize); bn++) {
|
||||
List<String> batch = hashes.subList(bn * batchSize, min((bn * batchSize) + batchSize, hashes.size()));
|
||||
|
||||
for (Entry<String, Movie> it : xmlrpc.checkMovieHash(batch).entrySet()) {
|
||||
int minSeenCount = 20; // make sure we don't get mismatches by making sure the hash has not been confirmed numerous times
|
||||
for (Entry<String, Movie> it : xmlrpc.checkMovieHash(batch, minSeenCount).entrySet()) {
|
||||
result.put(hashMap.get(it.getKey()), it.getValue());
|
||||
getCache().putData("getMovieDescriptor", it.getKey(), locale, it.getValue());
|
||||
}
|
||||
|
@ -223,23 +223,45 @@ public class OpenSubtitlesXmlRpc {
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<String, Movie> checkMovieHash(Collection<String> hashes) throws XmlRpcFault {
|
||||
Map<?, ?> response = invoke("CheckMovieHash", token, hashes);
|
||||
|
||||
Map<String, ?> movieHashData = (Map<String, ?>) response.get("data");
|
||||
public Map<String, Movie> checkMovieHash(Collection<String> hashes, int minSeenCount) throws XmlRpcFault {
|
||||
Map<String, Movie> movieHashMap = new HashMap<String, Movie>();
|
||||
|
||||
for (Entry<String, ?> entry : movieHashData.entrySet()) {
|
||||
// empty associative arrays are deserialized as array, not as map
|
||||
if (entry.getValue() instanceof Map) {
|
||||
Map<String, String> info = (Map<String, String>) entry.getValue();
|
||||
Map<?, ?> response = invoke("CheckMovieHash2", token, hashes);
|
||||
Object payload = response.get("data");
|
||||
|
||||
String hash = info.get("MovieHash");
|
||||
String name = info.get("MovieName");
|
||||
int year = Integer.parseInt(info.get("MovieYear"));
|
||||
int imdb = Integer.parseInt(info.get("MovieImdbID"));
|
||||
if (payload instanceof Map) {
|
||||
Map<String, ?> movieHashData = (Map<String, ?>) payload;
|
||||
for (Entry<String, ?> entry : movieHashData.entrySet()) {
|
||||
// empty associative arrays are deserialized as array, not as map
|
||||
if (entry.getValue() instanceof List) {
|
||||
String hash = entry.getKey();
|
||||
List<Movie> matches = new ArrayList<Movie>();
|
||||
|
||||
movieHashMap.put(hash, new Movie(name, year, imdb, -1));
|
||||
List<?> hashMatches = (List<?>) entry.getValue();
|
||||
for (Object match : hashMatches) {
|
||||
if (match instanceof Map) {
|
||||
Map<String, String> info = (Map<String, String>) match;
|
||||
int seenCount = Integer.parseInt(info.get("SeenCount"));
|
||||
|
||||
// require minimum SeenCount before this hash match is considered trusted
|
||||
if (seenCount >= minSeenCount) {
|
||||
String name = info.get("MovieName");
|
||||
int year = Integer.parseInt(info.get("MovieYear"));
|
||||
int imdb = Integer.parseInt(info.get("MovieImdbID"));
|
||||
|
||||
matches.add(new Movie(name, year, imdb, -1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matches.size() == 1) {
|
||||
// perfect unambiguous match
|
||||
movieHashMap.put(hash, matches.get(0));
|
||||
} else if (matches.size() > 1) {
|
||||
// multiple hash matches => ignore all
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Ignore hash match due to hash collision: " + matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,15 +10,15 @@ import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import net.sourceforge.filebot.web.OpenSubtitlesSubtitleDescriptor.Property;
|
||||
import net.sourceforge.filebot.web.OpenSubtitlesXmlRpc.Query;
|
||||
import net.sourceforge.filebot.web.OpenSubtitlesXmlRpc.SubFile;
|
||||
import net.sourceforge.filebot.web.OpenSubtitlesXmlRpc.TryUploadResponse;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class OpenSubtitlesXmlRpcTest {
|
||||
|
||||
@ -35,7 +35,7 @@ public class OpenSubtitlesXmlRpcTest {
|
||||
@Test
|
||||
public void search() throws Exception {
|
||||
List<Movie> list = xmlrpc.searchMoviesOnIMDB("babylon 5");
|
||||
Movie sample = (Movie) list.get(0);
|
||||
Movie sample = list.get(0);
|
||||
|
||||
// check sample entry
|
||||
assertEquals("Babylon 5", sample.getName());
|
||||
@ -131,7 +131,7 @@ public class OpenSubtitlesXmlRpcTest {
|
||||
|
||||
@Test
|
||||
public void checkMovieHash() throws Exception {
|
||||
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("d7aa0275cace4410"));
|
||||
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("d7aa0275cace4410"), 0);
|
||||
Movie movie = results.get("d7aa0275cace4410");
|
||||
|
||||
assertEquals("Iron Man", movie.getName());
|
||||
@ -142,7 +142,7 @@ public class OpenSubtitlesXmlRpcTest {
|
||||
|
||||
@Test
|
||||
public void checkMovieHashInvalid() throws Exception {
|
||||
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("0123456789abcdef"));
|
||||
Map<String, Movie> results = xmlrpc.checkMovieHash(singleton("0123456789abcdef"), 0);
|
||||
|
||||
// no movie info
|
||||
assertTrue(results.isEmpty());
|
||||
|
Loading…
Reference in New Issue
Block a user