filebot/source/net/filebot/similarity/NumericSimilarityMetric.java

36 lines
1.0 KiB
Java

package net.filebot.similarity;
import static java.util.stream.Collectors.*;
import static net.filebot.util.StringUtilities.*;
import static org.simmetrics.builders.StringMetricBuilder.*;
import java.util.List;
import org.simmetrics.StringMetric;
import org.simmetrics.metrics.BlockDistance;
import org.simmetrics.tokenizers.AbstractTokenizer;
public class NumericSimilarityMetric implements SimilarityMetric {
private final StringMetric metric = with(new BlockDistance<String>()).tokenize(new NumberTokeniser()).build();
@Override
public float getSimilarity(Object o1, Object o2) {
return metric.compare(normalize(o1), normalize(o2));
}
protected String normalize(Object object) {
// no need to do anything special here, because we don't care about anything but number patterns anyway
return object.toString();
}
private static class NumberTokeniser extends AbstractTokenizer {
@Override
public List<String> tokenizeToList(String input) {
return matchIntegers(input).stream().map(String::valueOf).collect(toList());
}
}
}