mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-14 07:18:03 -05:00
72 lines
2.4 KiB
Java
72 lines
2.4 KiB
Java
package net.filebot.web;
|
|
|
|
import static java.util.Arrays.*;
|
|
import static java.util.Collections.reverseOrder;
|
|
import static java.util.Comparator.*;
|
|
import static java.util.stream.Collectors.*;
|
|
import static net.filebot.similarity.Normalization.*;
|
|
|
|
import java.util.AbstractMap.SimpleImmutableEntry;
|
|
import java.util.Collection;
|
|
import java.util.List;
|
|
import java.util.Map.Entry;
|
|
import java.util.Objects;
|
|
import java.util.Set;
|
|
import java.util.concurrent.ExecutionException;
|
|
import java.util.function.Function;
|
|
import java.util.stream.IntStream;
|
|
|
|
import com.ibm.icu.text.Transliterator;
|
|
|
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
|
|
|
public class LocalSearch<T> {
|
|
|
|
private AbstractStringMetric metric = new QGramsDistance();
|
|
private float resultMinimumSimilarity = 0.5f;
|
|
private int resultSetSize = 20;
|
|
|
|
private Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
|
|
|
|
private T[] objects;
|
|
private Set<String>[] fields;
|
|
|
|
public LocalSearch(T[] data, Function<T, Collection<String>> keywords) {
|
|
objects = data.clone();
|
|
fields = stream(objects).map(keywords).map(this::normalize).toArray(Set[]::new);
|
|
}
|
|
|
|
public List<T> search(String q) throws ExecutionException, InterruptedException {
|
|
String query = normalize(q);
|
|
|
|
return IntStream.range(0, objects.length).mapToObj(i -> {
|
|
T object = objects[i];
|
|
Set<String> field = fields[i];
|
|
|
|
boolean match = field.stream().anyMatch(it -> it.contains(query));
|
|
double similarity = field.stream().mapToDouble(it -> metric.getSimilarity(query, it)).max().orElse(0);
|
|
|
|
return match || similarity > resultMinimumSimilarity ? new SimpleImmutableEntry<T, Double>(object, similarity) : null;
|
|
}).filter(Objects::nonNull).sorted(reverseOrder(comparing(Entry::getValue))).limit(resultSetSize).map(Entry::getKey).collect(toList());
|
|
}
|
|
|
|
public void setResultMinimumSimilarity(float resultMinimumSimilarity) {
|
|
this.resultMinimumSimilarity = resultMinimumSimilarity;
|
|
}
|
|
|
|
public void setResultSetSize(int resultSetSize) {
|
|
this.resultSetSize = resultSetSize;
|
|
}
|
|
|
|
protected Set<String> normalize(Collection<String> values) {
|
|
return values.stream().map(this::normalize).collect(toSet());
|
|
}
|
|
|
|
protected String normalize(String value) {
|
|
// normalize separator, trim and normalize case
|
|
return normalizePunctuation(transliterator.transform(value)).toLowerCase();
|
|
}
|
|
|
|
}
|