2014-04-19 02:30:29 -04:00
|
|
|
package net.filebot.similarity;
|
2008-06-29 13:38:57 -04:00
|
|
|
|
2018-07-03 02:39:18 -04:00
|
|
|
import static java.util.stream.Collectors.*;
|
|
|
|
import static net.filebot.util.StringUtilities.*;
|
2019-01-30 11:30:59 -05:00
|
|
|
import static org.simmetrics.builders.StringMetricBuilder.*;
|
2016-01-10 00:03:09 -05:00
|
|
|
|
2019-01-30 11:30:59 -05:00
|
|
|
import java.util.List;
|
2008-06-29 13:38:57 -04:00
|
|
|
|
2019-01-30 11:30:59 -05:00
|
|
|
import org.simmetrics.StringMetric;
|
|
|
|
import org.simmetrics.metrics.BlockDistance;
|
|
|
|
import org.simmetrics.tokenizers.AbstractTokenizer;
|
2008-06-29 13:38:57 -04:00
|
|
|
|
2009-01-11 16:23:03 -05:00
|
|
|
public class NumericSimilarityMetric implements SimilarityMetric {
|
2015-07-25 18:47:19 -04:00
|
|
|
|
2019-01-30 11:30:59 -05:00
|
|
|
private final StringMetric metric = with(new BlockDistance<String>()).tokenize(new NumberTokeniser()).build();
|
2015-07-25 18:47:19 -04:00
|
|
|
|
2008-06-29 13:38:57 -04:00
|
|
|
@Override
|
2009-01-11 16:23:03 -05:00
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2019-01-30 11:30:59 -05:00
|
|
|
return metric.compare(normalize(o1), normalize(o2));
|
2009-01-11 16:23:03 -05:00
|
|
|
}
|
2015-07-25 18:47:19 -04:00
|
|
|
|
2009-01-11 16:23:03 -05:00
|
|
|
protected String normalize(Object object) {
|
2009-07-26 12:54:24 -04:00
|
|
|
// no need to do anything special here, because we don't care about anything but number patterns anyway
|
|
|
|
return object.toString();
|
2008-06-29 13:38:57 -04:00
|
|
|
}
|
2015-07-25 18:47:19 -04:00
|
|
|
|
2019-01-30 11:30:59 -05:00
|
|
|
private static class NumberTokeniser extends AbstractTokenizer {
|
2015-07-25 18:47:19 -04:00
|
|
|
|
2008-06-29 13:38:57 -04:00
|
|
|
@Override
|
2019-01-30 11:30:59 -05:00
|
|
|
public List<String> tokenizeToList(String input) {
|
|
|
|
return matchIntegers(input).stream().map(String::valueOf).collect(toList());
|
2008-06-29 13:38:57 -04:00
|
|
|
}
|
|
|
|
}
|
2015-07-25 18:47:19 -04:00
|
|
|
|
2008-06-29 13:38:57 -04:00
|
|
|
}
|