From 54d4dad95543ce4f14e38390e440fc12bef7d4a3 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Fri, 24 Jan 2014 16:01:37 +0000 Subject: [PATCH] * rewrite subtitle auto-selection (should work much better now for both strict and -non-strict modes) --- .../filebot/cli/CmdlineOperations.java | 93 +++++--------- .../filebot/media/MediaDetection.java | 19 +++ .../filebot/subtitle/SubtitleUtilities.java | 114 ++++++++++++++++++ .../ui/subtitle/SubtitleAutoMatchDialog.java | 91 +------------- .../filebot/ui/subtitle/SubtitlePanel.java | 15 +-- website/data/query-blacklist.txt | 4 +- 6 files changed, 172 insertions(+), 164 deletions(-) diff --git a/source/net/sourceforge/filebot/cli/CmdlineOperations.java b/source/net/sourceforge/filebot/cli/CmdlineOperations.java index 4cb5a47f..df5e1bf6 100644 --- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java +++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java @@ -639,18 +639,24 @@ public class CmdlineOperations implements CmdlineInterface { final Language language = getLanguage(languageName); final Pattern databaseFilter = (db != null) ? Pattern.compile(db, Pattern.CASE_INSENSITIVE) : null; final SubtitleNaming naming = getSubtitleNaming(format); - CLILogger.finest(String.format("Get [%s] subtitles for %d files", language.getName(), files.size())); // when rewriting subtitles to target format an encoding must be defined, default to UTF-8 final Charset outputEncoding = (csn != null) ? Charset.forName(csn) : (output != null) ? Charset.forName("UTF-8") : null; final SubtitleFormat outputFormat = (output != null) ? getSubtitleFormatByName(output) : null; + // ignore anything that is not a video + files = filter(files, VIDEO_FILES); + + // ignore clutter files from processing + files = filter(files, not(getClutterFileFilter())); + // try to find subtitles for each video file - List remainingVideos = new ArrayList(filter(files, VIDEO_FILES)); + List remainingVideos = new ArrayList(files); // parallel download List subtitleFiles = new ArrayList(); + CLILogger.finest(String.format("Get [%s] subtitles for %d files", language.getName(), remainingVideos.size())); if (remainingVideos.isEmpty()) { throw new Exception("No video files: " + files); } @@ -672,49 +678,24 @@ public class CmdlineOperations implements CmdlineInterface { } } - // lookup subtitles via text search, only perform hash lookup in strict mode - if (!remainingVideos.isEmpty()) { - // auto-detect search query - Set querySet = new TreeSet(String.CASE_INSENSITIVE_ORDER); - - if (query == null) { - try { - List videoFiles = filter(files, VIDEO_FILES); - querySet.addAll(detectSeriesNames(videoFiles, true, false, language.getLocale())); - - // auto-detect movie names - for (File f : videoFiles) { - if (!isEpisode(f.getName(), false)) { - for (Movie movie : detectMovie(f, null, null, language.getLocale(), strict)) { - querySet.add(movie.getName()); - } - } - } - } catch (Exception e) { - CLILogger.warning("Movie detection failed: " + e.getMessage()); - } - - if (querySet.isEmpty()) { - throw new Exception("Failed to auto-detect query"); - } - } else { - querySet.add(query); + for (SubtitleProvider service : getSubtitleProviders()) { + if (remainingVideos.isEmpty() || (databaseFilter != null && !databaseFilter.matcher(service.getName()).matches())) { + continue; } - for (SubtitleProvider service : getSubtitleProviders()) { - if (remainingVideos.isEmpty() || (databaseFilter != null && !databaseFilter.matcher(service.getName()).matches())) { - continue; - } - - try { - CLILogger.fine(format("Searching for %s at [%s]", querySet, service.getName())); - Map subtitles = lookupSubtitleByFileName(service, querySet, language, remainingVideos, strict); - Map downloads = downloadSubtitleBatch(service.getName(), subtitles, outputFormat, outputEncoding, naming); - remainingVideos.removeAll(downloads.keySet()); - subtitleFiles.addAll(downloads.values()); - } catch (Exception e) { - CLILogger.warning(format("Search for %s failed: %s", querySet, e.getMessage())); + try { + CLILogger.fine(format("Looking up subtitles by name via %s", service.getName())); + Map subtitles = new TreeMap(); + for (Entry> it : findSubtitleMatches(service, remainingVideos, language.getName(), query, false, strict).entrySet()) { + if (it.getValue().size() > 0) { + subtitles.put(it.getKey(), it.getValue().get(0)); + } } + Map downloads = downloadSubtitleBatch(service.getName(), subtitles, outputFormat, outputEncoding, naming); + remainingVideos.removeAll(downloads.keySet()); + subtitleFiles.addAll(downloads.values()); + } catch (Exception e) { + CLILogger.warning(format("Search by name failed: %s", e.getMessage())); } } @@ -823,37 +804,21 @@ public class CmdlineOperations implements CmdlineInterface { } private Map lookupSubtitleByHash(VideoHashSubtitleService service, Language language, Collection videoFiles) throws Exception { - Map subtitleByVideo = new HashMap(videoFiles.size()); + Map subtitleByVideo = new TreeMap(); for (Entry> it : service.getSubtitleList(videoFiles.toArray(new File[0]), language.getName()).entrySet()) { - if (it.getValue() != null && it.getValue().size() > 0) { - // guess best hash match (default order is open bad due to invalid hash links) - Entry bestMatch = matchSubtitles(singleton(it.getKey()), it.getValue(), false).entrySet().iterator().next(); + // guess best hash match (default order is open bad due to invalid hash links) + SubtitleDescriptor bestMatch = getBestMatch(it.getKey(), it.getValue(), false); - CLILogger.finest(format("Matched [%s] to [%s] via filehash", bestMatch.getKey().getName(), bestMatch.getValue().getName())); - subtitleByVideo.put(bestMatch.getKey(), bestMatch.getValue()); + if (bestMatch != null) { + CLILogger.finest(format("Matched [%s] to [%s] via filehash", it.getKey().getName(), bestMatch.getName())); + subtitleByVideo.put(it.getKey(), bestMatch); } } return subtitleByVideo; } - private Map lookupSubtitleByFileName(SubtitleProvider service, Collection querySet, Language language, Collection videoFiles, boolean strict) throws Exception { - // search for subtitles - Set subtitles = findSubtitles(service, querySet, language.getName()); - - // match subtitle files to video files - if (subtitles.size() > 0) { - Map subtitleByVideo = matchSubtitles(videoFiles, subtitles, strict); - for (Entry it : subtitleByVideo.entrySet()) { - CLILogger.finest(format("Matched [%s] to [%s] via filename", it.getKey().getName(), it.getValue().getName())); - } - return subtitleByVideo; - } - - return emptyMap(); - } - private List applyExpressionFilter(Collection input, ExpressionFilter filter) throws Exception { if (filter == null) { return new ArrayList(input); diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index f278e974..2c5001c3 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -33,6 +33,7 @@ import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.Set; import java.util.SortedSet; +import java.util.TreeMap; import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; @@ -955,6 +956,24 @@ public class MediaDetection { return mediaFolders; } + public static Map> mapBySeriesName(Collection files, boolean useSeriesIndex, boolean useAnimeIndex, Locale locale) throws Exception { + Map> result = new TreeMap>(String.CASE_INSENSITIVE_ORDER); + + for (File f : files) { + List names = detectSeriesNames(singleton(f), useSeriesIndex, useAnimeIndex, locale); + String key = names.isEmpty() ? "" : names.get(0); + + List value = result.get(key); + if (value == null) { + value = new ArrayList(); + result.put(key, value); + } + value.add(f); + } + + return result; + } + public static File guessMediaFolder(File file) { List tail = listPathTail(file, 3, true); diff --git a/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java b/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java index 14c8256a..b7b5c7e0 100644 --- a/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java +++ b/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java @@ -1,6 +1,7 @@ package net.sourceforge.filebot.subtitle; import static java.lang.Math.*; +import static java.util.Collections.*; import static net.sourceforge.filebot.MediaTypes.*; import static net.sourceforge.filebot.media.MediaDetection.*; import static net.sourceforge.filebot.similarity.EpisodeMetrics.*; @@ -15,16 +16,22 @@ import java.nio.CharBuffer; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; +import java.util.NoSuchElementException; import java.util.Set; +import java.util.TreeSet; import net.sourceforge.filebot.Language; +import net.sourceforge.filebot.similarity.EpisodeMetrics; import net.sourceforge.filebot.similarity.Match; import net.sourceforge.filebot.similarity.Matcher; import net.sourceforge.filebot.similarity.MetricAvg; @@ -34,12 +41,105 @@ import net.sourceforge.filebot.similarity.SequenceMatchSimilarity; import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.vfs.ArchiveType; import net.sourceforge.filebot.vfs.MemoryFile; +import net.sourceforge.filebot.web.Movie; import net.sourceforge.filebot.web.SearchResult; import net.sourceforge.filebot.web.SubtitleDescriptor; import net.sourceforge.filebot.web.SubtitleProvider; public final class SubtitleUtilities { + public static Map> findSubtitleMatches(SubtitleProvider service, Collection fileSet, String languageName, String forceQuery, boolean addOptions, boolean strict) throws Exception { + // ignore anything that is not a video + fileSet = filter(fileSet, VIDEO_FILES); + + // ignore clutter files from processing + fileSet = filter(fileSet, not(getClutterFileFilter())); + + // collect results + Map> subtitlesByFile = new HashMap>(); + + for (List byMediaFolder : mapByMediaFolder(fileSet).values()) { + for (Entry> bySeries : mapBySeriesName(byMediaFolder, true, false, Locale.ENGLISH).entrySet()) { + // auto-detect query and search for subtitles + Collection querySet = new TreeSet(String.CASE_INSENSITIVE_ORDER); + List files = bySeries.getValue(); + + if (forceQuery != null && forceQuery.length() > 0) { + querySet.add(forceQuery); + } else if (bySeries.getKey().length() > 0) { + // use auto-detected series name as query + querySet.add(bySeries.getKey()); + } else { + for (File f : files) { + List queries = new ArrayList(); + + // might be a movie, auto-detect movie names + if (!isEpisode(f.getPath(), true)) { + for (Movie it : detectMovie(f, null, null, Locale.ENGLISH, strict)) { + queries.add(it.getName()); + } + } + + if (queries.isEmpty()) { + queries.add(stripReleaseInfo(getName(f))); + } + + querySet.addAll(queries); + } + } + + Set subtitles = findSubtitles(service, querySet, languageName); + + // dialog may have been cancelled by now + if (Thread.interrupted()) { + throw new InterruptedException(); + } + + // files by possible subtitles matches + for (File file : files) { + subtitlesByFile.put(file, new ArrayList()); + } + + // add other possible matches to the options + SimilarityMetric sanity = EpisodeMetrics.verificationMetric(); + float minMatchSimilarity = strict ? 0.9f : 0.6f; + + // first match everything as best as possible, then filter possibly bad matches + for (Entry it : matchSubtitles(files, subtitles, strict).entrySet()) { + if (sanity.getSimilarity(it.getKey(), it.getValue()) >= minMatchSimilarity) { + subtitlesByFile.get(it.getKey()).add(it.getValue()); + } + } + + // this could be very slow, lets hope at this point there is not much left due to positive hash matches + for (File file : files) { + // add matching subtitles + for (SubtitleDescriptor it : subtitles) { + // grab only the first best option unless we really want all options + if (!addOptions && subtitlesByFile.get(file).size() >= 1) + continue; + + // ignore if it's already been added + if (subtitlesByFile.get(file).contains(it)) + continue; + + // ignore if we're sure that SxE is a negative match + if (isEpisode(it.getName(), true) && isEpisode(file.getPath(), true) && EpisodeMetrics.EpisodeFunnel.getSimilarity(file, it) < 1) + continue; + + // ignore if it's not similar enough + if (sanity.getSimilarity(file, it) < minMatchSimilarity) + continue; + + subtitlesByFile.get(file).add(it); + } + } + } + } + + return subtitlesByFile; + } + public static Map matchSubtitles(Collection files, Collection subtitles, boolean strict) throws InterruptedException { Map subtitleByVideo = new LinkedHashMap(); @@ -106,6 +206,20 @@ public final class SubtitleUtilities { return probableMatches; } + public static SubtitleDescriptor getBestMatch(File file, Collection subtitles, boolean strict) { + if (file == null || subtitles == null || subtitles.isEmpty()) { + return null; + } + + try { + return matchSubtitles(singleton(file), subtitles, strict).entrySet().iterator().next().getValue(); + } catch (NoSuchElementException e) { + return null; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + /** * Detect charset and parse subtitle file even if extension is invalid */ diff --git a/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java b/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java index 06a54f47..c1393a67 100644 --- a/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java +++ b/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java @@ -1,12 +1,9 @@ package net.sourceforge.filebot.ui.subtitle; -import static java.util.Collections.*; import static javax.swing.BorderFactory.*; import static javax.swing.JOptionPane.*; -import static net.sourceforge.filebot.media.MediaDetection.*; import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*; import static net.sourceforge.tuned.FileUtilities.*; -import static net.sourceforge.tuned.StringUtilities.*; import static net.sourceforge.tuned.ui.TunedUtilities.*; import java.awt.Color; @@ -26,7 +23,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -67,7 +63,6 @@ import net.sourceforge.filebot.similarity.MetricCascade; import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.subtitle.SubtitleNaming; import net.sourceforge.filebot.vfs.MemoryFile; -import net.sourceforge.filebot.web.Movie; import net.sourceforge.filebot.web.SubtitleDescriptor; import net.sourceforge.filebot.web.SubtitleProvider; import net.sourceforge.filebot.web.VideoHashSubtitleService; @@ -192,18 +187,6 @@ class SubtitleAutoMatchDialog extends JDialog { servicePanel.add(component); } - // remember last user input - private List userQuery = new ArrayList(); - - protected List getUserQuery(String suggestion, String title, Component parent) throws Exception { - synchronized (userQuery) { - if (userQuery.isEmpty()) { - userQuery.addAll(showMultiValueInputDialog("Enter series / movie names:", suggestion, title, parent)); - } - return userQuery; - } - } - public void startQuery(String languageName) { final SubtitleMappingTableModel mappingModel = (SubtitleMappingTableModel) subtitleMappingTable.getModel(); QueryTask queryTask = new QueryTask(services, mappingModel.getVideoFiles(), languageName, SubtitleAutoMatchDialog.this) { @@ -735,10 +718,11 @@ class SubtitleAutoMatchDialog extends JDialog { Set subtitlesByRelevance = new LinkedHashSet(); // guess best hash match (default order is open bad due to invalid hash links) - if (result.getValue().size() > 0) { - Entry bestMatch = matchSubtitles(singleton(result.getKey()), result.getValue(), false).entrySet().iterator().next(); - subtitlesByRelevance.add(bestMatch.getValue()); + SubtitleDescriptor bestMatch = getBestMatch(result.getKey(), result.getValue(), false); + if (bestMatch != null) { + subtitlesByRelevance.add(bestMatch); } + subtitlesByRelevance.addAll(result.getValue()); // associate subtitles with services @@ -924,72 +908,7 @@ class SubtitleAutoMatchDialog extends JDialog { @Override protected Map> getSubtitleList(Collection fileSet, String languageName, Component parent) throws Exception { - // ignore clutter files from processing - fileSet = filter(fileSet, not(getClutterFileFilter())); - - // collect results - Map> subtitlesByFile = new HashMap>(); - - for (List files : mapByMediaFolder(fileSet).values()) { - // auto-detect query and search for subtitles - Collection querySet = new TreeSet(String.CASE_INSENSITIVE_ORDER); - - // auto-detect series names - querySet.addAll(detectSeriesNames(files, true, false, Locale.ROOT)); - - // auto-detect movie names - for (File f : files) { - if (!isEpisode(f.getName(), false)) { - for (Movie movie : detectMovie(f, null, null, Locale.ROOT, false)) { - querySet.add(movie.getName()); - } - } - } - - Set subtitles = findSubtitles(service, querySet, languageName); - - // dialog may have been cancelled by now - if (Thread.interrupted()) { - throw new CancellationException(); - } - - // if auto-detection fails, ask user for input - if (subtitles.isEmpty()) { - querySet = inputProvider.getUserQuery(join(querySet, ","), service.getName(), parent); - subtitles = findSubtitles(service, querySet, languageName); - - // still no luck... na women ye mei banfa - if (subtitles.isEmpty()) { - throw new Exception("Unable to lookup subtitles: " + querySet); - } - } - - // files by possible subtitles matches - for (File file : files) { - subtitlesByFile.put(file, new ArrayList()); - } - - // first match everything as best as possible, then filter possibly bad matches - for (Entry it : matchSubtitles(files, subtitles, false).entrySet()) { - subtitlesByFile.get(it.getKey()).add(it.getValue()); - } - - // add other possible matches to the options - SimilarityMetric sanity = EpisodeMetrics.verificationMetric(); - float minMatchSimilarity = 0.5f; - - // this could be very slow, lets hope at this point there is not much left due to positive hash matches - for (File file : files) { - // add matching subtitles - for (SubtitleDescriptor it : subtitles) { - if (!subtitlesByFile.get(file).contains(it) && sanity.getSimilarity(file, it) >= minMatchSimilarity) { - subtitlesByFile.get(file).add(it); - } - } - } - } - - return subtitlesByFile; + return findSubtitleMatches(service, fileSet, languageName, null, true, false); } @Override diff --git a/source/net/sourceforge/filebot/ui/subtitle/SubtitlePanel.java b/source/net/sourceforge/filebot/ui/subtitle/SubtitlePanel.java index 7abb8794..239104b5 100644 --- a/source/net/sourceforge/filebot/ui/subtitle/SubtitlePanel.java +++ b/source/net/sourceforge/filebot/ui/subtitle/SubtitlePanel.java @@ -16,6 +16,7 @@ import java.awt.geom.Path2D; import java.net.URI; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.logging.Level; @@ -36,12 +37,9 @@ import net.sourceforge.filebot.Language; import net.sourceforge.filebot.ResourceManager; import net.sourceforge.filebot.Settings; import net.sourceforge.filebot.WebServices; -import net.sourceforge.filebot.media.MediaDetection; -import net.sourceforge.filebot.similarity.Normalization; import net.sourceforge.filebot.ui.AbstractSearchPanel; import net.sourceforge.filebot.ui.LanguageComboBox; import net.sourceforge.filebot.ui.SelectDialog; -import net.sourceforge.filebot.web.Movie; import net.sourceforge.filebot.web.OpenSubtitlesClient; import net.sourceforge.filebot.web.SearchResult; import net.sourceforge.filebot.web.SubtitleDescriptor; @@ -142,16 +140,7 @@ public class SubtitlePanel extends AbstractSearchPanel getHistory(SubtitleProvider engine) throws Exception { - final List names = new ArrayList(200000); - for (Movie it : MediaDetection.releaseInfo.getMovieList()) { - names.addAll(it.getEffectiveNamesWithoutYear()); - } - for (SearchResult it : MediaDetection.releaseInfo.getTheTVDBIndex()) { - for (String n : it.getEffectiveNames()) { - names.add(Normalization.removeTrailingBrackets(n)); - } - } - return names; + return Collections.emptyList(); }; @Override diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index bd597174..b720e412 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -92,7 +92,8 @@ ^Romance$ ^rtorrent$ ^Science.Fiction$ -^Scratch$ +^scratch$ +^scratch.area$ ^Season$ ^Seeding$ ^Seeds$ @@ -136,6 +137,7 @@ ^Volumes$ ^watch$ ^www$ +^XXX+$ A.PROCESAR A.Release.Lounge ABC