diff --git a/source/net/filebot/media/HighPerformanceMatcher.java b/source/net/filebot/media/HighPerformanceMatcher.java new file mode 100644 index 00000000..f56ba997 --- /dev/null +++ b/source/net/filebot/media/HighPerformanceMatcher.java @@ -0,0 +1,73 @@ +package net.filebot.media; + +import static java.util.stream.Collectors.*; +import static net.filebot.similarity.Normalization.*; +import static net.filebot.util.RegularExpressions.*; + +import java.text.CollationKey; +import java.text.Collator; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.Objects; + +import net.filebot.similarity.CommonSequenceMatcher; +import net.filebot.web.Movie; +import net.filebot.web.SearchResult; + +/** + * Fast name matcher used for matching a file to or more movies (out of a list of ~50k in milliseconds) + */ +class HighPerformanceMatcher extends CommonSequenceMatcher { + + private static final Collator collator = getLenientCollator(Locale.ENGLISH); + + public static CollationKey[] prepare(String sequence) { + String[] words = SPACE.split(sequence); + CollationKey[] keys = new CollationKey[words.length]; + for (int i = 0; i < words.length; i++) { + keys[i] = collator.getCollationKey(words[i]); + } + return keys; + } + + public static List prepare(Collection sequences) { + return sequences.stream().filter(Objects::nonNull).map(s -> { + return prepare(normalizePunctuation(s)); + }).collect(toList()); + } + + public static List> prepare(Movie m) { + List effectiveNamesWithoutYear = m.getEffectiveNamesWithoutYear(); + List effectiveNames = m.getEffectiveNames(); + List> index = new ArrayList>(effectiveNames.size()); + + for (int i = 0; i < effectiveNames.size(); i++) { + String lenientName = normalizePunctuation(effectiveNamesWithoutYear.get(i)); + String strictName = normalizePunctuation(effectiveNames.get(i)); + index.add(new IndexEntry(m, lenientName, strictName)); + } + return index; + } + + public static List> prepare(SearchResult r) { + List effectiveNames = r.getEffectiveNames(); + List> index = new ArrayList>(effectiveNames.size()); + + for (int i = 0; i < effectiveNames.size(); i++) { + String lenientName = normalizePunctuation(effectiveNames.get(i)); + index.add(new IndexEntry(r, lenientName, null)); + } + return index; + } + + public HighPerformanceMatcher(int maxStartIndex) { + super(collator, maxStartIndex, true); + } + + @Override + public CollationKey[] split(String sequence) { + throw new UnsupportedOperationException("requires ahead-of-time collation"); + } +} \ No newline at end of file diff --git a/source/net/filebot/media/IndexEntry.java b/source/net/filebot/media/IndexEntry.java new file mode 100644 index 00000000..2a65ef5d --- /dev/null +++ b/source/net/filebot/media/IndexEntry.java @@ -0,0 +1,52 @@ +package net.filebot.media; + +import java.io.Serializable; +import java.text.CollationKey; + +class IndexEntry implements Serializable { + + private T object; + private String lenientName; + private String strictName; + + private transient CollationKey[] lenientKey; + private transient CollationKey[] strictKey; + + public IndexEntry(T object, String lenientName, String strictName) { + this.object = object; + this.lenientName = lenientName; + this.strictName = strictName; + } + + public T getObject() { + return object; + } + + public String getLenientName() { + return lenientName; + } + + public String getStrictName() { + return strictName; + } + + public CollationKey[] getLenientKey() { + if (lenientKey == null && lenientName != null) { + lenientKey = HighPerformanceMatcher.prepare(lenientName); + } + return lenientKey; + } + + public CollationKey[] getStrictKey() { + if (strictKey == null && strictName != null) { + strictKey = HighPerformanceMatcher.prepare(strictName); + } + return strictKey; + } + + @Override + public String toString() { + return strictName != null ? strictName : lenientName; + } + +} diff --git a/source/net/filebot/media/MediaDetection.java b/source/net/filebot/media/MediaDetection.java index d3076b39..5285a2fd 100644 --- a/source/net/filebot/media/MediaDetection.java +++ b/source/net/filebot/media/MediaDetection.java @@ -11,15 +11,12 @@ import static net.filebot.similarity.CommonSequenceMatcher.*; import static net.filebot.similarity.Normalization.*; import static net.filebot.subtitle.SubtitleUtilities.*; import static net.filebot.util.FileUtilities.*; -import static net.filebot.util.RegularExpressions.*; import static net.filebot.util.StringUtilities.*; import java.io.File; import java.io.FileFilter; import java.io.IOException; -import java.io.Serializable; import java.text.CollationKey; -import java.text.Collator; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -50,7 +47,6 @@ import net.filebot.WebServices; import net.filebot.archive.Archive; import net.filebot.mediainfo.MediaInfo; import net.filebot.mediainfo.MediaInfo.StreamKind; -import net.filebot.similarity.CommonSequenceMatcher; import net.filebot.similarity.DateMatcher; import net.filebot.similarity.EpisodeMetrics; import net.filebot.similarity.MetricAvg; @@ -554,7 +550,7 @@ public class MediaDetection { for (String term : terms) { if (term.contains(name)) { if (metric.getSimilarity(term, name) >= similarityThreshold) { - seriesList.add(it.object); + seriesList.add(it.getObject()); } break; } @@ -1327,109 +1323,6 @@ public class MediaDetection { return sortBySimilarity(probableMatches, singleton(query), new NameSimilarityMetric(), names); } - public static class IndexEntry implements Serializable { - - private T object; - private String lenientName; - private String strictName; - - private transient CollationKey[] lenientKey; - private transient CollationKey[] strictKey; - - public IndexEntry(T object, String lenientName, String strictName) { - this.object = object; - this.lenientName = lenientName; - this.strictName = strictName; - } - - public T getObject() { - return object; - } - - public String getLenientName() { - return lenientName; - } - - public String getStrictName() { - return strictName; - } - - public CollationKey[] getLenientKey() { - if (lenientKey == null && lenientName != null) { - lenientKey = HighPerformanceMatcher.prepare(lenientName); - } - return lenientKey; - } - - public CollationKey[] getStrictKey() { - if (strictKey == null && strictName != null) { - strictKey = HighPerformanceMatcher.prepare(strictName); - } - return strictKey; - } - - @Override - public String toString() { - return strictName != null ? strictName : lenientName; - } - } - - /* - * Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k) - */ - private static class HighPerformanceMatcher extends CommonSequenceMatcher { - - private static final Collator collator = getLenientCollator(Locale.ENGLISH); - - public static CollationKey[] prepare(String sequence) { - String[] words = SPACE.split(sequence); - CollationKey[] keys = new CollationKey[words.length]; - for (int i = 0; i < words.length; i++) { - keys[i] = collator.getCollationKey(words[i]); - } - return keys; - } - - public static List prepare(Collection sequences) { - return sequences.stream().filter(Objects::nonNull).map(s -> { - return prepare(normalizePunctuation(s)); - }).collect(toList()); - } - - public static List> prepare(Movie m) { - List effectiveNamesWithoutYear = m.getEffectiveNamesWithoutYear(); - List effectiveNames = m.getEffectiveNames(); - List> index = new ArrayList>(effectiveNames.size()); - - for (int i = 0; i < effectiveNames.size(); i++) { - String lenientName = normalizePunctuation(effectiveNamesWithoutYear.get(i)); - String strictName = normalizePunctuation(effectiveNames.get(i)); - index.add(new IndexEntry(m, lenientName, strictName)); - } - return index; - } - - public static List> prepare(SearchResult r) { - List effectiveNames = r.getEffectiveNames(); - List> index = new ArrayList>(effectiveNames.size()); - - for (int i = 0; i < effectiveNames.size(); i++) { - String lenientName = normalizePunctuation(effectiveNames.get(i)); - index.add(new IndexEntry(r, lenientName, null)); - } - return index; - } - - public HighPerformanceMatcher(int maxStartIndex) { - super(collator, maxStartIndex, true); - } - - @Override - public CollationKey[] split(String sequence) { - throw new UnsupportedOperationException("requires ahead-of-time collation"); - } - } - public static void warmupCachedResources() throws Exception { // load filter data MediaDetection.getClutterFileFilter();