filebot/source/net/filebot/similarity/NumericSimilarityMetric.java

36 lines
1.0 KiB
Java
Raw Normal View History

2014-04-19 02:30:29 -04:00
package net.filebot.similarity;
import static java.util.stream.Collectors.*;
import static net.filebot.util.StringUtilities.*;
2019-01-30 11:30:59 -05:00
import static org.simmetrics.builders.StringMetricBuilder.*;
2016-01-10 00:03:09 -05:00
2019-01-30 11:30:59 -05:00
import java.util.List;
2019-01-30 11:30:59 -05:00
import org.simmetrics.StringMetric;
import org.simmetrics.metrics.BlockDistance;
import org.simmetrics.tokenizers.AbstractTokenizer;
public class NumericSimilarityMetric implements SimilarityMetric {
2015-07-25 18:47:19 -04:00
2019-01-30 11:30:59 -05:00
private final StringMetric metric = with(new BlockDistance<String>()).tokenize(new NumberTokeniser()).build();
2015-07-25 18:47:19 -04:00
@Override
public float getSimilarity(Object o1, Object o2) {
2019-01-30 11:30:59 -05:00
return metric.compare(normalize(o1), normalize(o2));
}
2015-07-25 18:47:19 -04:00
protected String normalize(Object object) {
// no need to do anything special here, because we don't care about anything but number patterns anyway
return object.toString();
}
2015-07-25 18:47:19 -04:00
2019-01-30 11:30:59 -05:00
private static class NumberTokeniser extends AbstractTokenizer {
2015-07-25 18:47:19 -04:00
@Override
2019-01-30 11:30:59 -05:00
public List<String> tokenizeToList(String input) {
return matchIntegers(input).stream().map(String::valueOf).collect(toList());
}
}
2015-07-25 18:47:19 -04:00
}