2008-06-29 13:38:57 -04:00
|
|
|
|
|
|
|
package net.sourceforge.filebot.ui.panel.rename.metric;
|
|
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.HashSet;
|
|
|
|
import java.util.Scanner;
|
|
|
|
import java.util.Set;
|
|
|
|
|
|
|
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
|
|
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.EuclideanDistance;
|
|
|
|
import uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser;
|
|
|
|
import uk.ac.shef.wit.simmetrics.wordhandlers.DummyStopTermHandler;
|
|
|
|
import uk.ac.shef.wit.simmetrics.wordhandlers.InterfaceTermHandler;
|
|
|
|
|
|
|
|
|
|
|
|
public class NumericSimilarityMetric extends AbstractNameSimilarityMetric {
|
|
|
|
|
|
|
|
private final AbstractStringMetric metric;
|
|
|
|
|
|
|
|
|
|
|
|
public NumericSimilarityMetric() {
|
|
|
|
// I have absolutely no clue as to why, but I get a good matching behavior
|
2008-11-19 11:28:59 -05:00
|
|
|
// when using a numeric tokensier with EuclideanDistance
|
2008-06-29 13:38:57 -04:00
|
|
|
metric = new EuclideanDistance(new NumberTokeniser());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public float getSimilarity(String a, String b) {
|
|
|
|
return metric.getSimilarity(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getDescription() {
|
|
|
|
return "Similarity of number patterns";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getName() {
|
|
|
|
return "Numbers";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private static class NumberTokeniser implements InterfaceTokeniser {
|
|
|
|
|
2008-07-30 18:37:01 -04:00
|
|
|
private static final String delimiter = "(\\D)+";
|
2008-06-29 13:38:57 -04:00
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public ArrayList<String> tokenizeToArrayList(String input) {
|
|
|
|
ArrayList<String> tokens = new ArrayList<String>();
|
|
|
|
|
|
|
|
Scanner scanner = new Scanner(input);
|
|
|
|
scanner.useDelimiter(delimiter);
|
|
|
|
|
|
|
|
while (scanner.hasNextInt()) {
|
|
|
|
tokens.add(Integer.toString(scanner.nextInt()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return tokens;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Set<String> tokenizeToSet(String input) {
|
|
|
|
return new HashSet<String>(tokenizeToArrayList(input));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getShortDescriptionString() {
|
|
|
|
return getClass().getSimpleName();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getDelimiters() {
|
|
|
|
return delimiter;
|
|
|
|
}
|
|
|
|
|
|
|
|
private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public InterfaceTermHandler getStopWordHandler() {
|
|
|
|
return stopWordHandler;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void setStopWordHandler(InterfaceTermHandler stopWordHandler) {
|
|
|
|
this.stopWordHandler = stopWordHandler;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|