* major performance improvements for mass-movie matching by optimizing data caching

* caching of requests didn't work in parallel searches because threads would always hit similar files (in order) at the same time and can't benefit from caching. new logic is one thread per folder as all files resulting in the same query are usually in the same folder
This commit is contained in:
Reinhard Pointner 2012-07-24 20:01:48 +00:00
parent d33a907f2f
commit d143e3feb5
6 changed files with 75 additions and 56 deletions

View File

@ -31,7 +31,6 @@ import java.util.Map.Entry;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.WeakHashMap;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -427,7 +426,6 @@ public class MediaDetection {
return null; return null;
} }
private static List<Entry<String, Movie>> movieIndex; private static List<Entry<String, Movie>> movieIndex;
@ -531,7 +529,7 @@ public class MediaDetection {
final SimilarityMetric metric = new NameSimilarityMetric(); final SimilarityMetric metric = new NameSimilarityMetric();
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>(); final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
for (String query : querySet) { for (String query : querySet) {
for (Movie movie : queryLookupService.searchMovie(query, locale)) { for (Movie movie : queryLookupService.searchMovie(query.toLowerCase(), locale)) {
probabilityMap.put(movie, metric.getSimilarity(query, movie)); probabilityMap.put(movie, metric.getSimilarity(query, movie));
} }
} }
@ -660,7 +658,7 @@ public class MediaDetection {
private static final Collator collator = getLenientCollator(Locale.ENGLISH); private static final Collator collator = getLenientCollator(Locale.ENGLISH);
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new WeakHashMap<String, CollationKey[]>(65536)); private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new HashMap<String, CollationKey[]>(65536));
public HighPerformanceMatcher(int maxStartIndex) { public HighPerformanceMatcher(int maxStartIndex) {

View File

@ -29,7 +29,6 @@ import java.util.Map;
import java.util.Scanner; import java.util.Scanner;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.WeakHashMap;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
@ -102,21 +101,46 @@ public class ReleaseInfo {
return lastMatch; return lastMatch;
} }
// cached patterns
private Pattern[] strict_stopwords;
private Pattern[] strict_blacklist;
private Pattern[] nonstrict_stopwords;
private Pattern[] nonstrict_blacklist;
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException { public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet(); Pattern[] stopwords;
Pattern[] blacklist;
Pattern clutterBracket = getClutterBracketPattern(strict); // initialize cached patterns
Pattern releaseGroup = getReleaseGroupPattern(strict); synchronized (this) {
Pattern languageSuffix = getLanguageSuffixPattern(languages); stopwords = strict ? strict_stopwords : nonstrict_stopwords;
Pattern languageTag = getLanguageTagPattern(languages); blacklist = strict ? strict_blacklist : nonstrict_blacklist;
Pattern videoSource = getVideoSourcePattern();
Pattern videoFormat = getVideoFormatPattern(); if (stopwords == null || blacklist == null) {
Pattern resolution = getResolutionPattern(); Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
Pattern queryBlacklist = getBlacklistPattern(); Pattern clutterBracket = getClutterBracketPattern(strict);
Pattern releaseGroup = getReleaseGroupPattern(strict);
Pattern[] stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix }; Pattern languageSuffix = getLanguageSuffixPattern(languages);
Pattern[] blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist }; Pattern languageTag = getLanguageTagPattern(languages);
Pattern videoSource = getVideoSourcePattern();
Pattern videoFormat = getVideoFormatPattern();
Pattern resolution = getResolutionPattern();
Pattern queryBlacklist = getBlacklistPattern();
stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
// cache compiled patterns for common usage
if (strict) {
strict_stopwords = stopwords;
strict_blacklist = blacklist;
} else {
nonstrict_stopwords = stopwords;
nonstrict_blacklist = blacklist;
}
}
}
List<String> output = new ArrayList<String>(items.size()); List<String> output = new ArrayList<String>(items.size());
for (String it : items) { for (String it : items) {
@ -334,17 +358,8 @@ public class ReleaseInfo {
return patterns; return patterns;
} }
private final Map<Set<Locale>, Map<String, Locale>> languageMapCache = synchronizedMap(new WeakHashMap<Set<Locale>, Map<String, Locale>>(2));
private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) { private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
// try cache
Set<Locale> displayLocales = new HashSet<Locale>(asList(supportedDisplayLocale));
Map<String, Locale> languageMap = languageMapCache.get(displayLocales);
if (languageMap != null) {
return languageMap;
}
// use maximum strength collator by default // use maximum strength collator by default
Collator collator = Collator.getInstance(Locale.ROOT); Collator collator = Collator.getInstance(Locale.ROOT);
collator.setDecomposition(Collator.FULL_DECOMPOSITION); collator.setDecomposition(Collator.FULL_DECOMPOSITION);
@ -352,7 +367,7 @@ public class ReleaseInfo {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
Comparator<String> order = (Comparator) collator; Comparator<String> order = (Comparator) collator;
languageMap = new TreeMap<String, Locale>(order); Map<String, Locale> languageMap = languageMap = new TreeMap<String, Locale>(order);
for (String code : Locale.getISOLanguages()) { for (String code : Locale.getISOLanguages()) {
Locale locale = new Locale(code); Locale locale = new Locale(code);
@ -360,7 +375,7 @@ public class ReleaseInfo {
languageMap.put(locale.getISO3Language(), locale); languageMap.put(locale.getISO3Language(), locale);
// map display language names for given locales // map display language names for given locales
for (Locale language : displayLocales) { for (Locale language : new HashSet<Locale>(asList(supportedDisplayLocale))) {
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax // make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD); String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
languageMap.put(languageName, locale); languageMap.put(languageName, locale);
@ -373,7 +388,6 @@ public class ReleaseInfo {
languageMap.remove("III"); languageMap.remove("III");
Map<String, Locale> result = unmodifiableMap(languageMap); Map<String, Locale> result = unmodifiableMap(languageMap);
languageMapCache.put(displayLocales, result);
return result; return result;
} }
} }

View File

@ -7,9 +7,9 @@ import static java.util.Collections.*;
import java.text.CollationKey; import java.text.CollationKey;
import java.text.Collator; import java.text.Collator;
import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.WeakHashMap;
public class CommonSequenceMatcher { public class CommonSequenceMatcher {
@ -22,7 +22,6 @@ public class CommonSequenceMatcher {
return collator; return collator;
} }
protected final Collator collator; protected final Collator collator;
protected final int commonSequenceMaxStartIndex; protected final int commonSequenceMaxStartIndex;
@ -81,8 +80,7 @@ public class CommonSequenceMatcher {
return getCollationKeys(sequence.split("\\s+")); return getCollationKeys(sequence.split("\\s+"));
} }
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new HashMap<String, CollationKey>(256));
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new WeakHashMap<String, CollationKey>(256));
protected CollationKey[] getCollationKeys(String[] words) { protected CollationKey[] getCollationKeys(String[] words) {

View File

@ -7,13 +7,13 @@ import static java.util.Collections.*;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.WeakHashMap;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE; import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Episode;
@ -77,9 +77,8 @@ public class EpisodeMatcher extends Matcher<File, Object> {
} }
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true); private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new WeakHashMap<File, Set<SxE>>(64, 4)); private final Map<File, Set<SxE>> transformCache = synchronizedMap(new HashMap<File, Set<SxE>>(64, 4));
private Set<SxE> parseEpisodeIdentifer(File file) { private Set<SxE> parseEpisodeIdentifer(File file) {

View File

@ -10,8 +10,8 @@ import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File; import java.io.File;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.WeakHashMap;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE; import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.vfs.FileInfo; import net.sourceforge.filebot.vfs.FileInfo;
@ -26,7 +26,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// Match by season / episode numbers // Match by season / episode numbers
SeasonEpisode(new SeasonEpisodeMetric() { SeasonEpisode(new SeasonEpisodeMetric() {
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new WeakHashMap<Object, Collection<SxE>>(64, 4)); private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new HashMap<Object, Collection<SxE>>(64, 4));
@Override @Override
@ -64,7 +64,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// Match episode airdate // Match episode airdate
AirDate(new DateMetric() { AirDate(new DateMetric() {
private final Map<Object, Date> transformCache = synchronizedMap(new WeakHashMap<Object, Date>(64, 4)); private final Map<Object, Date> transformCache = synchronizedMap(new HashMap<Object, Date>(64, 4));
@Override @Override
@ -118,8 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)), EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
// Advanced episode <-> file matching // Advanced episode <-> file matching
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)), EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)), EpisodeBalancer(new SimilarityMetric() {
EpisodeBalancer(new SimilarityMetric() {
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
@ -301,7 +300,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
return metric.getSimilarity(o1, o2); return metric.getSimilarity(o1, o2);
} }
private static final Map<Object, String> transformCache = synchronizedMap(new WeakHashMap<Object, String>(64, 4)); private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
protected static String normalizeObject(Object object) { protected static String normalizeObject(Object object) {

View File

@ -15,11 +15,11 @@ import static net.sourceforge.tuned.ui.TunedUtilities.*;
import java.awt.Component; import java.awt.Component;
import java.awt.Dimension; import java.awt.Dimension;
import java.io.File; import java.io.File;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -144,21 +144,30 @@ class MovieHashMatcher implements AutoCompleteMatcher {
movieMatchFiles.addAll(filter(orphanedFiles, SUBTITLE_FILES)); // run movie detection only on orphaned subtitle files movieMatchFiles.addAll(filter(orphanedFiles, SUBTITLE_FILES)); // run movie detection only on orphaned subtitle files
// match remaining movies file by file in parallel // match remaining movies file by file in parallel
List<Future<Entry<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Entry<File, Collection<Movie>>>>(); List<Future<Map<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Map<File, Collection<Movie>>>>();
// process in parallel // process in parallel
ExecutorService executor = Executors.newFixedThreadPool(getPreferredThreadPoolSize()); ExecutorService executor = Executors.newFixedThreadPool(getPreferredThreadPoolSize());
// map all files by movie // map all files by movie
for (final File file : movieMatchFiles) { List<File> remainingFiles = new ArrayList<File>();
if (movieByFile.containsKey(file))
continue; for (File file : movieMatchFiles) {
if (!movieByFile.containsKey(file)) {
grabMovieJobs.add(executor.submit(new Callable<Entry<File, Collection<Movie>>>() { remainingFiles.add(file);
}
}
for (final Collection<File> folder : mapByFolder(remainingFiles).values()) {
grabMovieJobs.add(executor.submit(new Callable<Map<File, Collection<Movie>>>() {
@Override @Override
public SimpleEntry<File, Collection<Movie>> call() throws Exception { public Map<File, Collection<Movie>> call() throws Exception {
return new SimpleEntry<File, Collection<Movie>>(file, detectMovie(file, null, service, locale, false)); Map<File, Collection<Movie>> detection = new LinkedHashMap<File, Collection<Movie>>();
for (File f : folder) {
detection.put(f, detectMovie(f, null, service, locale, false));
}
return detection;
} }
})); }));
} }
@ -169,12 +178,14 @@ class MovieHashMatcher implements AutoCompleteMatcher {
memory.put("selection", new TreeMap<String, String>(getLenientCollator(locale))); memory.put("selection", new TreeMap<String, String>(getLenientCollator(locale)));
try { try {
for (Future<Entry<File, Collection<Movie>>> it : grabMovieJobs) { for (Future<Map<File, Collection<Movie>>> detection : grabMovieJobs) {
// auto-select movie or ask user // auto-select movie or ask user
File movieFile = it.get().getKey(); for (Entry<File, Collection<Movie>> it : detection.get().entrySet()) {
Movie movie = grabMovieName(movieFile, it.get().getValue(), locale, autodetect, memory, parent); File movieFile = it.getKey();
if (movie != null) { Movie movie = grabMovieName(movieFile, it.getValue(), locale, autodetect, memory, parent);
movieByFile.put(movieFile, movie); if (movie != null) {
movieByFile.put(movieFile, movie);
}
} }
} }
} finally { } finally {