mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-10 21:38:04 -05:00
Refactor LocalSearch and AutoDetection worker threads
This commit is contained in:
parent
3bd1655136
commit
d73934f09b
@ -11,7 +11,6 @@ import static net.filebot.util.FileUtilities.*;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
@ -69,7 +68,7 @@ public final class WebServices {
|
||||
public static final ID3Lookup MediaInfoID3 = new ID3Lookup();
|
||||
|
||||
public static EpisodeListProvider[] getEpisodeListProviders() {
|
||||
return new EpisodeListProvider[] { TheTVDB, TheMovieDB_TV, AniDB, TVmaze };
|
||||
return new EpisodeListProvider[] { TheTVDB, AniDB, TheMovieDB_TV, TVmaze };
|
||||
}
|
||||
|
||||
public static MovieIdentificationService[] getMovieIdentificationServices() {
|
||||
@ -108,6 +107,7 @@ public final class WebServices {
|
||||
}
|
||||
|
||||
public static final ExecutorService requestThreadPool = Executors.newCachedThreadPool();
|
||||
public static final ExecutorService workerThreadPool = Executors.newWorkStealingPool(getPreferredThreadPoolSize());
|
||||
|
||||
public static class TheTVDBClientWithLocalSearch extends TheTVDBClient {
|
||||
|
||||
@ -115,29 +115,10 @@ public final class WebServices {
|
||||
super(apikey);
|
||||
}
|
||||
|
||||
// index of local thetvdb data dump
|
||||
private static LocalSearch<SearchResult> localIndex;
|
||||
|
||||
public synchronized LocalSearch<SearchResult> getLocalIndex() throws Exception {
|
||||
if (localIndex == null) {
|
||||
// fetch data dump
|
||||
SearchResult[] data = releaseInfo.getTheTVDBIndex();
|
||||
|
||||
// index data dump
|
||||
localIndex = new LocalSearch<SearchResult>(asList(data)) {
|
||||
|
||||
@Override
|
||||
protected Set<String> getFields(SearchResult object) {
|
||||
return set(object.getEffectiveNames());
|
||||
}
|
||||
};
|
||||
|
||||
// make local search more restrictive
|
||||
localIndex.setResultMinimumSimilarity(0.7f);
|
||||
}
|
||||
|
||||
return localIndex;
|
||||
}
|
||||
// local TheTVDB search index
|
||||
private final Resource<LocalSearch<SearchResult>> localIndex = Resource.lazy(() -> {
|
||||
return new LocalSearch<SearchResult>(releaseInfo.getTheTVDBIndex(), SearchResult::getEffectiveNames);
|
||||
}).memoize();
|
||||
|
||||
private SearchResult merge(SearchResult prime, List<SearchResult> group) {
|
||||
int id = prime.getId();
|
||||
@ -150,7 +131,7 @@ public final class WebServices {
|
||||
public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception {
|
||||
// run local search and API search in parallel
|
||||
Future<List<SearchResult>> apiSearch = requestThreadPool.submit(() -> TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale));
|
||||
Future<List<SearchResult>> localSearch = requestThreadPool.submit(() -> getLocalIndex().search(query));
|
||||
Future<List<SearchResult>> localSearch = requestThreadPool.submit(() -> localIndex.get().search(query));
|
||||
|
||||
// combine alias names into a single search results, and keep API search name as primary name
|
||||
Collection<SearchResult> result = StreamEx.of(apiSearch.get()).append(localSearch.get()).groupingBy(SearchResult::getId, collectingAndThen(toList(), group -> merge(group.get(0), group))).values();
|
||||
@ -166,8 +147,8 @@ public final class WebServices {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<SearchResult> getAnimeTitles() throws Exception {
|
||||
return asList(releaseInfo.getAnidbIndex());
|
||||
public SearchResult[] getAnimeTitles() throws Exception {
|
||||
return releaseInfo.getAnidbIndex();
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,34 +158,15 @@ public final class WebServices {
|
||||
super(name, version);
|
||||
}
|
||||
|
||||
// index of local OpenSubtitles data dump
|
||||
private static LocalSearch<SubtitleSearchResult> localIndex;
|
||||
|
||||
public synchronized LocalSearch<SubtitleSearchResult> getLocalIndex() throws Exception {
|
||||
if (localIndex == null) {
|
||||
// fetch data dump
|
||||
SubtitleSearchResult[] data = releaseInfo.getOpenSubtitlesIndex();
|
||||
|
||||
// index data dump
|
||||
localIndex = new LocalSearch<SubtitleSearchResult>(asList(data)) {
|
||||
|
||||
@Override
|
||||
protected Set<String> getFields(SubtitleSearchResult object) {
|
||||
return set(object.getEffectiveNames());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return localIndex;
|
||||
}
|
||||
// local OpenSubtitles search index
|
||||
private final Resource<LocalSearch<SubtitleSearchResult>> localIndex = Resource.lazy(() -> {
|
||||
return new LocalSearch<SubtitleSearchResult>(releaseInfo.getOpenSubtitlesIndex(), SearchResult::getEffectiveNames);
|
||||
}).memoize();
|
||||
|
||||
@Override
|
||||
public synchronized List<SubtitleSearchResult> search(final String query) throws Exception {
|
||||
List<SubtitleSearchResult> results = getLocalIndex().search(query);
|
||||
|
||||
return sortBySimilarity(results, singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric()));
|
||||
public List<SubtitleSearchResult> search(final String query) throws Exception {
|
||||
return sortBySimilarity(localIndex.get().search(query), singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -27,8 +27,6 @@ import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.logging.Level;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
@ -122,9 +120,7 @@ public class AutoDetection {
|
||||
Map<Group, Set<File>> groups = new TreeMap<Group, Set<File>>();
|
||||
|
||||
// can't use parallel stream because default fork/join pool doesn't play well with the security manager
|
||||
ExecutorService executor = Executors.newWorkStealingPool();
|
||||
|
||||
stream(files).collect(toMap(f -> f, f -> executor.submit(() -> detectGroup(f)))).forEach((file, group) -> {
|
||||
stream(files).collect(toMap(f -> f, f -> workerThreadPool.submit(() -> detectGroup(f)))).forEach((file, group) -> {
|
||||
try {
|
||||
groups.computeIfAbsent(group.get(), k -> new TreeSet<File>()).add(file);
|
||||
} catch (Exception e) {
|
||||
@ -132,7 +128,6 @@ public class AutoDetection {
|
||||
}
|
||||
});
|
||||
|
||||
executor.shutdown();
|
||||
return groups;
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,6 @@ import static net.filebot.WebServices.*;
|
||||
|
||||
import java.awt.Component;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
@ -16,9 +15,9 @@ import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.logging.Level;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import net.filebot.media.AutoDetection;
|
||||
import net.filebot.media.AutoDetection.Group;
|
||||
@ -38,21 +37,19 @@ class AutoDetectMatcher implements AutoCompleteMatcher {
|
||||
Map<Group, Set<File>> groups = new AutoDetection(files, false, locale).group();
|
||||
|
||||
// can't use parallel stream because default fork/join pool doesn't play well with the security manager
|
||||
ExecutorService executor = Executors.newWorkStealingPool();
|
||||
List<Match<File, ?>> result = new ArrayList<Match<File, ?>>();
|
||||
Map<Group, Future<List<Match<File, ?>>>> matches = groups.entrySet().stream().collect(toMap(Entry::getKey, it -> {
|
||||
return workerThreadPool.submit(() -> match(it.getKey(), it.getValue(), strict, order, locale, autodetection, parent));
|
||||
}));
|
||||
|
||||
groups.entrySet().stream().collect(toMap(Entry::getKey, it -> {
|
||||
return executor.submit(() -> match(it.getKey(), it.getValue(), strict, order, locale, autodetection, parent));
|
||||
})).forEach((group, matches) -> {
|
||||
// collect results
|
||||
return matches.entrySet().stream().flatMap(it -> {
|
||||
try {
|
||||
result.addAll(matches.get());
|
||||
return it.getValue().get().stream();
|
||||
} catch (Exception e) {
|
||||
log.log(Level.WARNING, "Failed to process group: " + group, e);
|
||||
log.log(Level.WARNING, "Failed to process group: %s" + it.getKey(), e);
|
||||
}
|
||||
});
|
||||
|
||||
executor.shutdown();
|
||||
return result;
|
||||
return Stream.empty();
|
||||
}).collect(toList());
|
||||
}
|
||||
|
||||
private List<Match<File, ?>> match(Group group, Collection<File> files, boolean strict, SortOrder order, Locale locale, boolean autodetection, Component parent) throws Exception {
|
||||
|
@ -21,7 +21,6 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
@ -35,6 +34,7 @@ import org.w3c.dom.Node;
|
||||
|
||||
import net.filebot.Cache;
|
||||
import net.filebot.CacheType;
|
||||
import net.filebot.Resource;
|
||||
import net.filebot.ResourceManager;
|
||||
|
||||
public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
@ -80,16 +80,14 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
return fetchSearchResult(query, locale);
|
||||
}
|
||||
|
||||
// local AniDB search index
|
||||
private final Resource<LocalSearch<SearchResult>> localIndex = Resource.lazy(() -> {
|
||||
return new LocalSearch<SearchResult>(getAnimeTitles(), SearchResult::getEffectiveNames);
|
||||
}).memoize();
|
||||
|
||||
@Override
|
||||
public List<SearchResult> fetchSearchResult(String query, Locale locale) throws Exception {
|
||||
LocalSearch<SearchResult> index = new LocalSearch<SearchResult>(getAnimeTitles()) {
|
||||
|
||||
@Override
|
||||
protected Set<String> getFields(SearchResult it) {
|
||||
return set(it.getEffectiveNames());
|
||||
}
|
||||
};
|
||||
return new ArrayList<SearchResult>(index.search(query));
|
||||
return localIndex.get().search(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -183,7 +181,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
/**
|
||||
* This method is overridden in {@link net.filebot.WebServices.AnidbClientWithLocalSearch} to fetch the Anime Index from our own host and not anidb.net
|
||||
*/
|
||||
public synchronized List<SearchResult> getAnimeTitles() throws Exception {
|
||||
public synchronized SearchResult[] getAnimeTitles() throws Exception {
|
||||
// get data file (unzip and cache)
|
||||
byte[] bytes = getCache("root").bytes("anime-titles.dat.gz", n -> new URL("http://anidb.net/api/" + n)).get();
|
||||
|
||||
@ -230,24 +228,20 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
}
|
||||
|
||||
// build up a list of all possible AniDB search results
|
||||
List<SearchResult> anime = new ArrayList<SearchResult>(entriesByAnime.size());
|
||||
|
||||
entriesByAnime.forEach((aid, triples) -> {
|
||||
List<String> names = triples.stream().sorted((a, b) -> {
|
||||
return entriesByAnime.entrySet().stream().map(it -> {
|
||||
List<String> names = it.getValue().stream().sorted((a, b) -> {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
if (!a[i].equals(b[i])) {
|
||||
return ((Comparable) a[i]).compareTo(b[i]);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}).map(it -> (String) it[2]).collect(toList());
|
||||
}).map(n -> n[2].toString()).collect(toList());
|
||||
|
||||
String primaryTitle = names.get(0);
|
||||
List<String> aliasNames = names.subList(1, names.size());
|
||||
anime.add(new SearchResult(aid, primaryTitle, aliasNames));
|
||||
});
|
||||
|
||||
return anime;
|
||||
return new SearchResult(it.getKey(), primaryTitle, aliasNames);
|
||||
}).toArray(SearchResult[]::new);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,19 +1,19 @@
|
||||
package net.filebot.web;
|
||||
|
||||
import static java.util.Collections.*;
|
||||
import static java.util.Arrays.*;
|
||||
import static java.util.Collections.reverseOrder;
|
||||
import static java.util.Comparator.*;
|
||||
import static java.util.stream.Collectors.*;
|
||||
import static net.filebot.similarity.Normalization.*;
|
||||
|
||||
import java.util.AbstractMap.SimpleImmutableEntry;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
@ -29,20 +29,20 @@ public class LocalSearch<T> {
|
||||
|
||||
private Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
|
||||
|
||||
private List<T> objects;
|
||||
private List<Set<String>> fields;
|
||||
private T[] objects;
|
||||
private Set<String>[] fields;
|
||||
|
||||
public LocalSearch(Collection<? extends T> data) {
|
||||
objects = new ArrayList<T>(data);
|
||||
fields = objects.stream().map(this::getFields).collect(toList());
|
||||
public LocalSearch(T[] data, Function<T, Collection<String>> keywords) {
|
||||
objects = data.clone();
|
||||
fields = stream(objects).map(keywords).map(this::normalize).toArray(Set[]::new);
|
||||
}
|
||||
|
||||
public List<T> search(String q) throws ExecutionException, InterruptedException {
|
||||
String query = normalize(q);
|
||||
|
||||
return IntStream.range(0, objects.size()).mapToObj(i -> {
|
||||
T object = objects.get(i);
|
||||
Set<String> field = fields.get(i);
|
||||
return IntStream.range(0, objects.length).mapToObj(i -> {
|
||||
T object = objects[i];
|
||||
Set<String> field = fields[i];
|
||||
|
||||
boolean match = field.stream().anyMatch(it -> it.contains(query));
|
||||
double similarity = field.stream().mapToDouble(it -> metric.getSimilarity(query, it)).max().orElse(0);
|
||||
@ -59,22 +59,12 @@ public class LocalSearch<T> {
|
||||
this.resultSetSize = resultSetSize;
|
||||
}
|
||||
|
||||
protected Set<String> getFields(T object) {
|
||||
return set(singleton(object.toString()));
|
||||
}
|
||||
|
||||
protected Set<String> set(Collection<String> values) {
|
||||
Set<String> set = new HashSet<String>(values.size());
|
||||
for (String value : values) {
|
||||
if (value != null) {
|
||||
set.add(normalize(value));
|
||||
}
|
||||
}
|
||||
return set;
|
||||
protected Set<String> normalize(Collection<String> values) {
|
||||
return values.stream().map(this::normalize).collect(toSet());
|
||||
}
|
||||
|
||||
protected String normalize(String value) {
|
||||
// normalize separator, normalize case and trim
|
||||
// normalize separator, trim and normalize case
|
||||
return normalizePunctuation(transliterator.transform(value)).toLowerCase();
|
||||
}
|
||||
|
||||
|
@ -28,8 +28,8 @@ public class AnidbClientTest {
|
||||
|
||||
@Test
|
||||
public void getAnimeTitles() throws Exception {
|
||||
List<SearchResult> animeTitles = anidb.getAnimeTitles();
|
||||
assertTrue(animeTitles.size() > 8000);
|
||||
SearchResult[] animeTitles = anidb.getAnimeTitles();
|
||||
assertTrue(animeTitles.length > 8000);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
Reference in New Issue
Block a user