From 40a98b08ae7a217bf7a7dc6c3cfbbb0bcd3568ca Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Sat, 23 Jan 2016 22:25:05 +0000 Subject: [PATCH] * unify GUI/CLI probable match selection --- source/net/filebot/WebServices.java | 5 +- source/net/filebot/cli/CmdlineOperations.java | 34 +---------- source/net/filebot/media/MediaDetection.java | 59 ++++++++++++------- .../filebot/ui/rename/EpisodeListMatcher.java | 2 +- 4 files changed, 44 insertions(+), 56 deletions(-) diff --git a/source/net/filebot/WebServices.java b/source/net/filebot/WebServices.java index 9ab998cc..600af768 100644 --- a/source/net/filebot/WebServices.java +++ b/source/net/filebot/WebServices.java @@ -162,7 +162,8 @@ public final class WebServices { } } } - return sortBySimilarity(results, singleton(query), getSeriesMatchMetric(), false); + + return sortBySimilarity(results, singleton(query), getSeriesMatchMetric()); } } @@ -209,7 +210,7 @@ public final class WebServices { public synchronized List search(final String query) throws Exception { List results = getLocalIndex().search(query); - return sortBySimilarity(results, singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric()), false); + return sortBySimilarity(results, singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric())); } } diff --git a/source/net/filebot/cli/CmdlineOperations.java b/source/net/filebot/cli/CmdlineOperations.java index c2fbefbf..ed0f360d 100644 --- a/source/net/filebot/cli/CmdlineOperations.java +++ b/source/net/filebot/cli/CmdlineOperations.java @@ -55,10 +55,7 @@ import net.filebot.media.XattrMetaInfoProvider; import net.filebot.similarity.CommonSequenceMatcher; import net.filebot.similarity.EpisodeMatcher; import net.filebot.similarity.Match; -import net.filebot.similarity.NameSimilarityMetric; import net.filebot.similarity.SeriesNameMatcher; -import net.filebot.similarity.SimilarityComparator; -import net.filebot.similarity.SimilarityMetric; import net.filebot.subtitle.SubtitleFormat; import net.filebot.subtitle.SubtitleNaming; import net.filebot.util.EntryList; @@ -908,36 +905,9 @@ public class CmdlineOperations implements CmdlineInterface { return output; } - public List findProbableMatches(final String query, Collection searchResults, boolean strict) { - if (query == null) { - return new ArrayList(searchResults); - } - - // auto-select most probable search result - List probableMatches = new ArrayList(); - - // use name similarity metric - SimilarityMetric metric = new NameSimilarityMetric(); - - // find probable matches using name similarity > 0.8 (or > 0.6 in non-strict mode) - for (SearchResult result : searchResults) { - float f = metric.getSimilarity(query, result.getName()); - if (f >= (strict && searchResults.size() > 1 ? 0.8 : 0.6) || ((f >= 0.5 || !strict) && (result.getName().toLowerCase().startsWith(query.toLowerCase())))) { - if (!probableMatches.contains(result)) { - probableMatches.add(result); - } - } - } - - // sort results by similarity to query - sort(probableMatches, new SimilarityComparator(query)); - - return probableMatches; - } - @SuppressWarnings("unchecked") public List selectSearchResult(String query, Collection searchResults, boolean strict) throws Exception { - List probableMatches = findProbableMatches(query, searchResults, strict); + List probableMatches = getProbableMatches(query, searchResults, strict); if (probableMatches.isEmpty() || (strict && probableMatches.size() != 1)) { // allow single search results to just pass through in non-strict mode even if match confidence is low @@ -951,7 +921,7 @@ public class CmdlineOperations implements CmdlineInterface { // just pick the best 5 matches if (query != null) { - probableMatches = (List) sortBySimilarity(searchResults, singleton(query), getSeriesMatchMetric(), false); + probableMatches = (List) sortBySimilarity(searchResults, singleton(query), getSeriesMatchMetric()); } } diff --git a/source/net/filebot/media/MediaDetection.java b/source/net/filebot/media/MediaDetection.java index a8ce7f11..d5eb7bb0 100644 --- a/source/net/filebot/media/MediaDetection.java +++ b/source/net/filebot/media/MediaDetection.java @@ -714,19 +714,9 @@ public class MediaDetection { return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true)); } - public static List sortBySimilarity(Collection options, Collection terms, SimilarityMetric metric, boolean stripReleaseInfo) throws IOException { - Collection paragon = new TreeSet(String.CASE_INSENSITIVE_ORDER); - - // clean clutter tokens if required - if (stripReleaseInfo) { - paragon.addAll(stripReleaseInfo(terms, true)); - paragon.addAll(stripReleaseInfo(terms, false)); - } else { - paragon.addAll(terms); - } - + public static List sortBySimilarity(Collection options, Collection terms, SimilarityMetric metric) { // similarity comparator with multi-value support - SimilarityComparator comparator = new SimilarityComparator(metric, paragon.toArray()) { + SimilarityComparator comparator = new SimilarityComparator(metric, terms.toArray()) { @Override public float getMaxSimilarity(Object obj) { @@ -748,6 +738,20 @@ public class MediaDetection { return result; } + public static List sortBySimilarity(Collection options, Collection terms, SimilarityMetric metric, boolean stripReleaseInfo) throws IOException { + Collection paragon = new TreeSet(String.CASE_INSENSITIVE_ORDER); + + // clean clutter tokens if required + if (stripReleaseInfo) { + paragon.addAll(stripReleaseInfo(terms, true)); + paragon.addAll(stripReleaseInfo(terms, false)); + } else { + paragon.addAll(terms); + } + + return sortBySimilarity(options, paragon, metric); + } + public static boolean isEpisodeNumberMatch(File f, Episode e) { float similarity = EpisodeMetrics.EpisodeIdentifier.getSimilarity(f, e); if (similarity >= 1) { @@ -1266,29 +1270,42 @@ public class MediaDetection { return WebServices.TheTVDB.getSeriesInfo(thetvdbid, locale); } - public static List getProbableMatches(String query, Collection options) { + public static List getProbableMatches(String query, Collection options, boolean strict) { + if (query == null) { + return new ArrayList(options); + } + // auto-select most probable search result - List probableMatches = new LinkedList(); + List probableMatches = new ArrayList(); // use name similarity metric SimilarityMetric metric = new NameSimilarityMetric(); - float threshold = 0.85f; + float threshold = strict && options.size() > 1 ? 0.8f : 0.6f; + float sanity = strict && options.size() > 1 ? 0.5f : 0.2f; - // remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who - query = removeTrailingBrackets(query); + // remove trailing braces, e.g. Doctor Who (2005) -> doctor who + query = removeTrailingBrackets(query).toLowerCase(); - // find probable matches using name similarity >= 0.85 + // find probable matches using name similarity > 0.8 (or > 0.6 in non-strict mode) for (SearchResult option : options) { float f = 0; for (String n : option.getEffectiveNames()) { - f = Math.max(f, metric.getSimilarity(query, removeTrailingBrackets(n))); + n = removeTrailingBrackets(n).toLowerCase(); + f = Math.max(f, metric.getSimilarity(query, n)); + + // boost matching beginnings + if (f >= sanity && n.startsWith(query)) { + f = 1; + break; + } } - if (f >= threshold) { + + if (f >= threshold && !probableMatches.contains(option)) { probableMatches.add(option); } } - return probableMatches; + return sortBySimilarity(probableMatches, singleton(query), new NameSimilarityMetric()); } public static class IndexEntry implements Serializable { diff --git a/source/net/filebot/ui/rename/EpisodeListMatcher.java b/source/net/filebot/ui/rename/EpisodeListMatcher.java index bb689ff5..6893957f 100644 --- a/source/net/filebot/ui/rename/EpisodeListMatcher.java +++ b/source/net/filebot/ui/rename/EpisodeListMatcher.java @@ -70,7 +70,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher { } // auto-select most probable search result - List probableMatches = getProbableMatches(query, searchResults); + List probableMatches = getProbableMatches(query, searchResults, true); // auto-select first and only probable search result if (probableMatches.size() == 1) {