mirror of
https://github.com/mitb-archive/filebot
synced 2024-11-02 00:15:02 -04:00
Refactor HighPerformanceMatcher into separate class files
This commit is contained in:
parent
0472084dc7
commit
6602af3f91
73
source/net/filebot/media/HighPerformanceMatcher.java
Normal file
73
source/net/filebot/media/HighPerformanceMatcher.java
Normal file
@ -0,0 +1,73 @@
|
||||
package net.filebot.media;
|
||||
|
||||
import static java.util.stream.Collectors.*;
|
||||
import static net.filebot.similarity.Normalization.*;
|
||||
import static net.filebot.util.RegularExpressions.*;
|
||||
|
||||
import java.text.CollationKey;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
|
||||
import net.filebot.similarity.CommonSequenceMatcher;
|
||||
import net.filebot.web.Movie;
|
||||
import net.filebot.web.SearchResult;
|
||||
|
||||
/**
|
||||
* Fast name matcher used for matching a file to or more movies (out of a list of ~50k in milliseconds)
|
||||
*/
|
||||
class HighPerformanceMatcher extends CommonSequenceMatcher {
|
||||
|
||||
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
|
||||
|
||||
public static CollationKey[] prepare(String sequence) {
|
||||
String[] words = SPACE.split(sequence);
|
||||
CollationKey[] keys = new CollationKey[words.length];
|
||||
for (int i = 0; i < words.length; i++) {
|
||||
keys[i] = collator.getCollationKey(words[i]);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
public static List<CollationKey[]> prepare(Collection<String> sequences) {
|
||||
return sequences.stream().filter(Objects::nonNull).map(s -> {
|
||||
return prepare(normalizePunctuation(s));
|
||||
}).collect(toList());
|
||||
}
|
||||
|
||||
public static List<IndexEntry<Movie>> prepare(Movie m) {
|
||||
List<String> effectiveNamesWithoutYear = m.getEffectiveNamesWithoutYear();
|
||||
List<String> effectiveNames = m.getEffectiveNames();
|
||||
List<IndexEntry<Movie>> index = new ArrayList<IndexEntry<Movie>>(effectiveNames.size());
|
||||
|
||||
for (int i = 0; i < effectiveNames.size(); i++) {
|
||||
String lenientName = normalizePunctuation(effectiveNamesWithoutYear.get(i));
|
||||
String strictName = normalizePunctuation(effectiveNames.get(i));
|
||||
index.add(new IndexEntry<Movie>(m, lenientName, strictName));
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
public static List<IndexEntry<SearchResult>> prepare(SearchResult r) {
|
||||
List<String> effectiveNames = r.getEffectiveNames();
|
||||
List<IndexEntry<SearchResult>> index = new ArrayList<IndexEntry<SearchResult>>(effectiveNames.size());
|
||||
|
||||
for (int i = 0; i < effectiveNames.size(); i++) {
|
||||
String lenientName = normalizePunctuation(effectiveNames.get(i));
|
||||
index.add(new IndexEntry<SearchResult>(r, lenientName, null));
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
public HighPerformanceMatcher(int maxStartIndex) {
|
||||
super(collator, maxStartIndex, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CollationKey[] split(String sequence) {
|
||||
throw new UnsupportedOperationException("requires ahead-of-time collation");
|
||||
}
|
||||
}
|
52
source/net/filebot/media/IndexEntry.java
Normal file
52
source/net/filebot/media/IndexEntry.java
Normal file
@ -0,0 +1,52 @@
|
||||
package net.filebot.media;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.text.CollationKey;
|
||||
|
||||
class IndexEntry<T> implements Serializable {
|
||||
|
||||
private T object;
|
||||
private String lenientName;
|
||||
private String strictName;
|
||||
|
||||
private transient CollationKey[] lenientKey;
|
||||
private transient CollationKey[] strictKey;
|
||||
|
||||
public IndexEntry(T object, String lenientName, String strictName) {
|
||||
this.object = object;
|
||||
this.lenientName = lenientName;
|
||||
this.strictName = strictName;
|
||||
}
|
||||
|
||||
public T getObject() {
|
||||
return object;
|
||||
}
|
||||
|
||||
public String getLenientName() {
|
||||
return lenientName;
|
||||
}
|
||||
|
||||
public String getStrictName() {
|
||||
return strictName;
|
||||
}
|
||||
|
||||
public CollationKey[] getLenientKey() {
|
||||
if (lenientKey == null && lenientName != null) {
|
||||
lenientKey = HighPerformanceMatcher.prepare(lenientName);
|
||||
}
|
||||
return lenientKey;
|
||||
}
|
||||
|
||||
public CollationKey[] getStrictKey() {
|
||||
if (strictKey == null && strictName != null) {
|
||||
strictKey = HighPerformanceMatcher.prepare(strictName);
|
||||
}
|
||||
return strictKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return strictName != null ? strictName : lenientName;
|
||||
}
|
||||
|
||||
}
|
@ -11,15 +11,12 @@ import static net.filebot.similarity.CommonSequenceMatcher.*;
|
||||
import static net.filebot.similarity.Normalization.*;
|
||||
import static net.filebot.subtitle.SubtitleUtilities.*;
|
||||
import static net.filebot.util.FileUtilities.*;
|
||||
import static net.filebot.util.RegularExpressions.*;
|
||||
import static net.filebot.util.StringUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.text.CollationKey;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@ -50,7 +47,6 @@ import net.filebot.WebServices;
|
||||
import net.filebot.archive.Archive;
|
||||
import net.filebot.mediainfo.MediaInfo;
|
||||
import net.filebot.mediainfo.MediaInfo.StreamKind;
|
||||
import net.filebot.similarity.CommonSequenceMatcher;
|
||||
import net.filebot.similarity.DateMatcher;
|
||||
import net.filebot.similarity.EpisodeMetrics;
|
||||
import net.filebot.similarity.MetricAvg;
|
||||
@ -554,7 +550,7 @@ public class MediaDetection {
|
||||
for (String term : terms) {
|
||||
if (term.contains(name)) {
|
||||
if (metric.getSimilarity(term, name) >= similarityThreshold) {
|
||||
seriesList.add(it.object);
|
||||
seriesList.add(it.getObject());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1327,109 +1323,6 @@ public class MediaDetection {
|
||||
return sortBySimilarity(probableMatches, singleton(query), new NameSimilarityMetric(), names);
|
||||
}
|
||||
|
||||
public static class IndexEntry<T> implements Serializable {
|
||||
|
||||
private T object;
|
||||
private String lenientName;
|
||||
private String strictName;
|
||||
|
||||
private transient CollationKey[] lenientKey;
|
||||
private transient CollationKey[] strictKey;
|
||||
|
||||
public IndexEntry(T object, String lenientName, String strictName) {
|
||||
this.object = object;
|
||||
this.lenientName = lenientName;
|
||||
this.strictName = strictName;
|
||||
}
|
||||
|
||||
public T getObject() {
|
||||
return object;
|
||||
}
|
||||
|
||||
public String getLenientName() {
|
||||
return lenientName;
|
||||
}
|
||||
|
||||
public String getStrictName() {
|
||||
return strictName;
|
||||
}
|
||||
|
||||
public CollationKey[] getLenientKey() {
|
||||
if (lenientKey == null && lenientName != null) {
|
||||
lenientKey = HighPerformanceMatcher.prepare(lenientName);
|
||||
}
|
||||
return lenientKey;
|
||||
}
|
||||
|
||||
public CollationKey[] getStrictKey() {
|
||||
if (strictKey == null && strictName != null) {
|
||||
strictKey = HighPerformanceMatcher.prepare(strictName);
|
||||
}
|
||||
return strictKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return strictName != null ? strictName : lenientName;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k)
|
||||
*/
|
||||
private static class HighPerformanceMatcher extends CommonSequenceMatcher {
|
||||
|
||||
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
|
||||
|
||||
public static CollationKey[] prepare(String sequence) {
|
||||
String[] words = SPACE.split(sequence);
|
||||
CollationKey[] keys = new CollationKey[words.length];
|
||||
for (int i = 0; i < words.length; i++) {
|
||||
keys[i] = collator.getCollationKey(words[i]);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
public static List<CollationKey[]> prepare(Collection<String> sequences) {
|
||||
return sequences.stream().filter(Objects::nonNull).map(s -> {
|
||||
return prepare(normalizePunctuation(s));
|
||||
}).collect(toList());
|
||||
}
|
||||
|
||||
public static List<IndexEntry<Movie>> prepare(Movie m) {
|
||||
List<String> effectiveNamesWithoutYear = m.getEffectiveNamesWithoutYear();
|
||||
List<String> effectiveNames = m.getEffectiveNames();
|
||||
List<IndexEntry<Movie>> index = new ArrayList<IndexEntry<Movie>>(effectiveNames.size());
|
||||
|
||||
for (int i = 0; i < effectiveNames.size(); i++) {
|
||||
String lenientName = normalizePunctuation(effectiveNamesWithoutYear.get(i));
|
||||
String strictName = normalizePunctuation(effectiveNames.get(i));
|
||||
index.add(new IndexEntry<Movie>(m, lenientName, strictName));
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
public static List<IndexEntry<SearchResult>> prepare(SearchResult r) {
|
||||
List<String> effectiveNames = r.getEffectiveNames();
|
||||
List<IndexEntry<SearchResult>> index = new ArrayList<IndexEntry<SearchResult>>(effectiveNames.size());
|
||||
|
||||
for (int i = 0; i < effectiveNames.size(); i++) {
|
||||
String lenientName = normalizePunctuation(effectiveNames.get(i));
|
||||
index.add(new IndexEntry<SearchResult>(r, lenientName, null));
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
public HighPerformanceMatcher(int maxStartIndex) {
|
||||
super(collator, maxStartIndex, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CollationKey[] split(String sequence) {
|
||||
throw new UnsupportedOperationException("requires ahead-of-time collation");
|
||||
}
|
||||
}
|
||||
|
||||
public static void warmupCachedResources() throws Exception {
|
||||
// load filter data
|
||||
MediaDetection.getClutterFileFilter();
|
||||
|
Loading…
Reference in New Issue
Block a user