1
0
mirror of https://github.com/mitb-archive/filebot synced 2025-03-09 22:09:47 -04:00

* replace TheTVDB online search with local search as best as possible to improve search result ranking

This commit is contained in:
Reinhard Pointner 2013-10-13 14:50:45 +00:00
parent 071ee0f1b0
commit 2c91a3be2e
9 changed files with 104 additions and 85 deletions

View File

@ -1,13 +1,11 @@
package net.sourceforge.filebot; package net.sourceforge.filebot;
import static java.util.Arrays.*; import static java.util.Arrays.*;
import static java.util.Collections.*; import static java.util.Collections.*;
import static net.sourceforge.filebot.Settings.*; import static net.sourceforge.filebot.Settings.*;
import static net.sourceforge.filebot.media.MediaDetection.*;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -20,7 +18,6 @@ import java.util.concurrent.Future;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import net.sourceforge.filebot.media.MediaDetection;
import net.sourceforge.filebot.web.AcoustID; import net.sourceforge.filebot.web.AcoustID;
import net.sourceforge.filebot.web.AnidbClient; import net.sourceforge.filebot.web.AnidbClient;
import net.sourceforge.filebot.web.AnidbSearchResult; import net.sourceforge.filebot.web.AnidbSearchResult;
@ -41,119 +38,107 @@ import net.sourceforge.filebot.web.TheTVDBClient;
import net.sourceforge.filebot.web.TheTVDBSearchResult; import net.sourceforge.filebot.web.TheTVDBSearchResult;
import net.sourceforge.filebot.web.VideoHashSubtitleService; import net.sourceforge.filebot.web.VideoHashSubtitleService;
/** /**
* Reuse the same web service client so login, cache, etc. can be shared. * Reuse the same web service client so login, cache, etc. can be shared.
*/ */
public final class WebServices { public final class WebServices {
// episode dbs // episode dbs
public static final TVRageClient TVRage = new TVRageClient(); public static final TVRageClient TVRage = new TVRageClient();
public static final AnidbClient AniDB = new AnidbClientWithLocalSearch(getApplicationName().toLowerCase(), 4); public static final AnidbClient AniDB = new AnidbClientWithLocalSearch(getApplicationName().toLowerCase(), 4);
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey")); public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
// extended TheTVDB module with local search // extended TheTVDB module with local search
public static final TheTVDBClientWithLocalSearch TheTVDB = new TheTVDBClientWithLocalSearch(getApplicationProperty("thetvdb.apikey")); public static final TheTVDBClientWithLocalSearch TheTVDB = new TheTVDBClientWithLocalSearch(getApplicationProperty("thetvdb.apikey"));
// movie dbs // movie dbs
public static final IMDbClient IMDb = new IMDbClient(); public static final IMDbClient IMDb = new IMDbClient();
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey")); public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
// subtitle dbs // subtitle dbs
public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClient(String.format("%s %s", getApplicationName(), getApplicationVersion())); public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClient(String.format("%s %s", getApplicationName(), getApplicationVersion()));
// misc // misc
public static final FanartTV FanartTV = new FanartTV(Settings.getApplicationProperty("fanart.tv.apikey")); public static final FanartTV FanartTV = new FanartTV(Settings.getApplicationProperty("fanart.tv.apikey"));
public static final AcoustID AcoustID = new AcoustID(Settings.getApplicationProperty("acoustid.apikey")); public static final AcoustID AcoustID = new AcoustID(Settings.getApplicationProperty("acoustid.apikey"));
public static EpisodeListProvider[] getEpisodeListProviders() { public static EpisodeListProvider[] getEpisodeListProviders() {
return new EpisodeListProvider[] { TheTVDB, AniDB, TVRage, Serienjunkies }; return new EpisodeListProvider[] { TheTVDB, AniDB, TVRage, Serienjunkies };
} }
public static MovieIdentificationService[] getMovieIdentificationServices() { public static MovieIdentificationService[] getMovieIdentificationServices() {
return new MovieIdentificationService[] { TMDb, IMDb, OpenSubtitles }; return new MovieIdentificationService[] { TMDb, IMDb, OpenSubtitles };
} }
public static SubtitleProvider[] getSubtitleProviders() { public static SubtitleProvider[] getSubtitleProviders() {
return new SubtitleProvider[] { OpenSubtitles }; return new SubtitleProvider[] { OpenSubtitles };
} }
public static VideoHashSubtitleService[] getVideoHashSubtitleServices() { public static VideoHashSubtitleService[] getVideoHashSubtitleServices() {
return new VideoHashSubtitleService[] { OpenSubtitles }; return new VideoHashSubtitleService[] { OpenSubtitles };
} }
public static MusicIdentificationService[] getMusicIdentificationServices() { public static MusicIdentificationService[] getMusicIdentificationServices() {
return new MusicIdentificationService[] { AcoustID, new ID3Lookup() }; return new MusicIdentificationService[] { AcoustID, new ID3Lookup() };
} }
public static EpisodeListProvider getEpisodeListProvider(String name) { public static EpisodeListProvider getEpisodeListProvider(String name) {
for (EpisodeListProvider it : WebServices.getEpisodeListProviders()) { for (EpisodeListProvider it : WebServices.getEpisodeListProviders()) {
if (it.getName().equalsIgnoreCase(name)) if (it.getName().equalsIgnoreCase(name))
return it; return it;
} }
return null; // default return null; // default
} }
public static MovieIdentificationService getMovieIdentificationService(String name) { public static MovieIdentificationService getMovieIdentificationService(String name) {
for (MovieIdentificationService it : getMovieIdentificationServices()) { for (MovieIdentificationService it : getMovieIdentificationServices()) {
if (it.getName().equalsIgnoreCase(name)) if (it.getName().equalsIgnoreCase(name))
return it; return it;
} }
return null; // default return null; // default
} }
public static MusicIdentificationService getMusicIdentificationService(String name) { public static MusicIdentificationService getMusicIdentificationService(String name) {
for (MusicIdentificationService it : getMusicIdentificationServices()) { for (MusicIdentificationService it : getMusicIdentificationServices()) {
if (it.getName().equalsIgnoreCase(name)) if (it.getName().equalsIgnoreCase(name))
return it; return it;
} }
return null; // default return null; // default
} }
public static class TheTVDBClientWithLocalSearch extends TheTVDBClient { public static class TheTVDBClientWithLocalSearch extends TheTVDBClient {
public TheTVDBClientWithLocalSearch(String apikey) { public TheTVDBClientWithLocalSearch(String apikey) {
super(apikey); super(apikey);
} }
// index of local thetvdb data dump // index of local thetvdb data dump
private static LocalSearch<SearchResult> localIndex; private static LocalSearch<SearchResult> localIndex;
public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException { public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
if (localIndex == null) { if (localIndex == null) {
// fetch data dump // fetch data dump
TheTVDBSearchResult[] data = MediaDetection.releaseInfo.getTheTVDBIndex(); TheTVDBSearchResult[] data = releaseInfo.getTheTVDBIndex();
// index data dump // index data dump
localIndex = new LocalSearch<SearchResult>(asList(data)) { localIndex = new LocalSearch<SearchResult>(asList(data)) {
@Override @Override
protected Set<String> getFields(SearchResult object) { protected Set<String> getFields(SearchResult object) {
return set(object.getNames()); return set(object.getEffectiveNames());
} }
}; };
// make local search more restrictive // make local search more restrictive
localIndex.setResultMinimumSimilarity(0.7f); localIndex.setResultMinimumSimilarity(0.7f);
} }
return localIndex; return localIndex;
} }
public SeriesInfo getSeriesInfoByLocalIndex(String name, Locale locale) throws Exception { public SeriesInfo getSeriesInfoByLocalIndex(String name, Locale locale) throws Exception {
List<SearchResult> results = getLocalIndex().search(name); List<SearchResult> results = getLocalIndex().search(name);
if (results.size() > 0) { if (results.size() > 0) {
@ -161,20 +146,19 @@ public final class WebServices {
} }
return null; return null;
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Override @Override
public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception { public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception {
Callable<List<SearchResult>> apiSearch = new Callable<List<SearchResult>>() { Callable<List<SearchResult>> apiSearch = new Callable<List<SearchResult>>() {
@Override @Override
public List<SearchResult> call() throws Exception { public List<SearchResult> call() throws Exception {
return TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale); return TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale);
} }
}; };
Callable<List<SearchResult>> localSearch = new Callable<List<SearchResult>>() { Callable<List<SearchResult>> localSearch = new Callable<List<SearchResult>>() {
@Override @Override
public List<SearchResult> call() throws Exception { public List<SearchResult> call() throws Exception {
try { try {
@ -182,17 +166,16 @@ public final class WebServices {
} catch (Exception e) { } catch (Exception e) {
Logger.getLogger(TheTVDBClientWithLocalSearch.class.getName()).log(Level.SEVERE, e.getMessage(), e); Logger.getLogger(TheTVDBClientWithLocalSearch.class.getName()).log(Level.SEVERE, e.getMessage(), e);
} }
// let local search fail gracefully without affecting API search // let local search fail gracefully without affecting API search
return emptyList(); return emptyList();
} }
}; };
ExecutorService executor = Executors.newFixedThreadPool(2); ExecutorService executor = Executors.newFixedThreadPool(2);
try { try {
Set<SearchResult> results = new LinkedHashSet<SearchResult>(); Set<SearchResult> results = new LinkedHashSet<SearchResult>();
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(localSearch, apiSearch))) {
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(apiSearch, localSearch))) {
try { try {
results.addAll(resultSet.get()); results.addAll(resultSet.get());
} catch (ExecutionException e) { } catch (ExecutionException e) {
@ -201,35 +184,32 @@ public final class WebServices {
} }
} }
} }
return new ArrayList<SearchResult>(results); return sortBySimilarity(results, singleton(query), getSeriesMatchMetric(), false);
} finally { } finally {
executor.shutdownNow(); executor.shutdownNow();
} }
}; };
} }
public static class AnidbClientWithLocalSearch extends AnidbClient { public static class AnidbClientWithLocalSearch extends AnidbClient {
public AnidbClientWithLocalSearch(String client, int clientver) { public AnidbClientWithLocalSearch(String client, int clientver) {
super(client, clientver); super(client, clientver);
} }
@Override @Override
public List<AnidbSearchResult> getAnimeTitles() throws Exception { public List<AnidbSearchResult> getAnimeTitles() throws Exception {
return asList(MediaDetection.releaseInfo.getAnidbIndex()); return asList(releaseInfo.getAnidbIndex());
} }
} }
/** /**
* Dummy constructor to prevent instantiation. * Dummy constructor to prevent instantiation.
*/ */
private WebServices() { private WebServices() {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
/** /**
* Initialize client settings from system properties * Initialize client settings from system properties
*/ */
@ -237,13 +217,11 @@ public final class WebServices {
String[] osdbLogin = getLogin("osdb.user"); String[] osdbLogin = getLogin("osdb.user");
OpenSubtitles.setUser(osdbLogin[0], osdbLogin[1]); OpenSubtitles.setUser(osdbLogin[0], osdbLogin[1]);
} }
public static String[] getLogin(String key) { public static String[] getLogin(String key) {
return Settings.forPackage(WebServices.class).get(key, ":").split(":", 2); return Settings.forPackage(WebServices.class).get(key, ":").split(":", 2);
} }
public static void setLogin(String id, String user, String password) { public static void setLogin(String id, String user, String password) {
Settings settings = Settings.forPackage(WebServices.class); Settings settings = Settings.forPackage(WebServices.class);
String value = user.length() > 0 && password.length() > 0 ? user + ":" + password : null; String value = user.length() > 0 && password.length() > 0 ? user + ":" + password : null;
@ -251,7 +229,7 @@ public final class WebServices {
user = ""; user = "";
password = ""; password = "";
} }
if (id.equals("osdb.user")) { if (id.equals("osdb.user")) {
settings.put(id, value); settings.put(id, value);
OpenSubtitles.setUser(user, password); OpenSubtitles.setUser(user, password);
@ -259,5 +237,5 @@ public final class WebServices {
throw new IllegalArgumentException(); throw new IllegalArgumentException();
} }
} }
} }

View File

@ -391,7 +391,7 @@ public class MediaDetection {
try { try {
for (SearchResult[] index : new SearchResult[][] { releaseInfo.getTheTVDBIndex(), releaseInfo.getAnidbIndex() }) { for (SearchResult[] index : new SearchResult[][] { releaseInfo.getTheTVDBIndex(), releaseInfo.getAnidbIndex() }) {
for (SearchResult item : index) { for (SearchResult item : index) {
for (String name : item.getNames()) { for (String name : item.getEffectiveNames()) {
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(name).toLowerCase(), item)); seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(name).toLowerCase(), item));
} }
} }
@ -541,7 +541,7 @@ public class MediaDetection {
// skip further queries if collected matches are already sufficient // skip further queries if collected matches are already sufficient
if (options.size() > 0 && movieNameMatches.size() > 0) { if (options.size() > 0 && movieNameMatches.size() > 0) {
options.addAll(movieNameMatches); options.addAll(movieNameMatches);
return sortBySimilarity(options, terms); return sortBySimilarity(options, terms, getMovieMatchMetric(), true);
} }
// if matching name+year failed, try matching only by name // if matching name+year failed, try matching only by name
@ -590,7 +590,7 @@ public class MediaDetection {
options.addAll(movieNameMatches); options.addAll(movieNameMatches);
// sort by relevance // sort by relevance
return sortBySimilarity(options, terms); return sortBySimilarity(options, terms, getMovieMatchMetric(), true);
} }
public static SimilarityMetric getMovieMatchMetric() { public static SimilarityMetric getMovieMatchMetric() {
@ -615,18 +615,43 @@ public class MediaDetection {
}); });
} }
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException { public static SimilarityMetric getSeriesMatchMetric() {
Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER); return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true));
paragon.addAll(stripReleaseInfo(terms, true)); }
paragon.addAll(stripReleaseInfo(terms, false));
List<T> sorted = new ArrayList<T>(options); public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms, SimilarityMetric metric, boolean stripReleaseInfo) throws IOException {
sort(sorted, new SimilarityComparator(getMovieMatchMetric(), paragon.toArray())); Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
// clean clutter tokens if required
if (stripReleaseInfo) {
paragon.addAll(stripReleaseInfo(terms, true));
paragon.addAll(stripReleaseInfo(terms, false));
} else {
paragon.addAll(terms);
}
// similarity comparator with multi-value support
SimilarityComparator comparator = new SimilarityComparator(metric, paragon.toArray()) {
@Override
public float getMaxSimilarity(Object obj) {
float f = 0;
Collection<?> names = obj instanceof SearchResult ? ((SearchResult) obj).getEffectiveNames() : singleton(obj);
for (Object it : names) {
f = Math.max(f, super.getMaxSimilarity(it));
}
return f;
}
};
// sort output array
List<T> result = new ArrayList<T>(options);
sort(result, comparator);
// DEBUG // DEBUG
// System.out.format("sortBySimilarity %s => %s", terms, sorted); // System.out.format("sortBySimilarity %s => %s%n", terms, result);
return sorted; return result;
} }
public static String reduceMovieName(String name, boolean strict) throws IOException { public static String reduceMovieName(String name, boolean strict) throws IOException {

View File

@ -200,7 +200,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
LinkedHashSet<String> set = new LinkedHashSet<String>(4); LinkedHashSet<String> set = new LinkedHashSet<String>(4);
set.add(removeTrailingBrackets(episode.getSeriesName())); set.add(removeTrailingBrackets(episode.getSeriesName()));
set.add(removeTrailingBrackets(episode.getTitle())); set.add(removeTrailingBrackets(episode.getTitle()));
for (String it : episode.getSeries().getNames()) { for (String it : episode.getSeries().getEffectiveNames()) {
set.add(removeTrailingBrackets(it)); set.add(removeTrailingBrackets(it));
} }

View File

@ -79,7 +79,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
@Override @Override
protected Set<String> getFields(SearchResult it) { protected Set<String> getFields(SearchResult it) {
return set(it.getNames()); return set(it.getEffectiveNames());
} }
}; };

View File

@ -1,6 +1,8 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
public class Movie extends SearchResult { public class Movie extends SearchResult {
@ -39,6 +41,16 @@ public class Movie extends SearchResult {
return tmdbId; return tmdbId;
} }
@Override
public List<String> getEffectiveNames() {
List<String> names = new ArrayList<String>(1 + aliasNames.length);
names.add(toString(name, year));
for (String alias : aliasNames) {
names.add(toString(alias, year));
}
return names;
}
@Override @Override
public boolean equals(Object object) { public boolean equals(Object object) {
if (object instanceof Movie) { if (object instanceof Movie) {
@ -67,6 +79,10 @@ public class Movie extends SearchResult {
@Override @Override
public String toString() { public String toString() {
return toString(name, year);
}
private static String toString(String name, int year) {
return String.format("%s (%04d)", name, year < 0 ? 0 : year); return String.format("%s (%04d)", name, year < 0 ? 0 : year);
} }

View File

@ -26,7 +26,7 @@ public abstract class SearchResult implements Serializable {
return aliasNames.clone(); return aliasNames.clone();
} }
public List<String> getNames() { public List<String> getEffectiveNames() {
return new AbstractList<String>() { return new AbstractList<String>() {
@Override @Override

View File

@ -65,7 +65,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
@Override @Override
protected Set<String> getFields(SearchResult series) { protected Set<String> getFields(SearchResult series) {
return set(series.getNames()); return set(series.getEffectiveNames());
} }
}; };

View File

@ -28,8 +28,8 @@ public class SerienjunkiesClientTest {
assertEquals(34, series.getSeriesId()); assertEquals(34, series.getSeriesId());
assertEquals("Alias", series.getLink()); assertEquals("Alias", series.getLink());
assertEquals("Alias - Die Agentin", series.getName()); assertEquals("Alias - Die Agentin", series.getName());
assertEquals("Alias", series.getNames().get(1)); assertEquals("Alias", series.getEffectiveNames().get(1));
assertEquals("Alias - Die Agentin", series.getNames().get(0)); assertEquals("Alias - Die Agentin", series.getEffectiveNames().get(0));
assertEquals("2001-09-30", series.getStartDate().toString()); assertEquals("2001-09-30", series.getStartDate().toString());
} }

View File

@ -377,7 +377,7 @@ C-N.NTFS
C-N.NTFS.No C-N.NTFS.No
C-P-S C-P-S
C-Subs C-Subs
C0NFUSED c0nFuSed
c0re c0re
C1 C1
C4DVD C4DVD