2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
package net.sourceforge.filebot.similarity;
|
|
|
|
|
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
import static java.util.Collections.*;
|
2012-01-01 22:48:24 -05:00
|
|
|
import static java.util.regex.Pattern.*;
|
|
|
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
2009-07-03 08:58:05 -04:00
|
|
|
import static net.sourceforge.tuned.StringUtilities.*;
|
|
|
|
|
2009-02-02 15:50:04 -05:00
|
|
|
import java.io.File;
|
2009-01-24 19:08:57 -05:00
|
|
|
import java.util.AbstractCollection;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.Collection;
|
|
|
|
import java.util.Comparator;
|
|
|
|
import java.util.Iterator;
|
|
|
|
import java.util.LinkedHashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
2011-12-03 05:50:45 -05:00
|
|
|
import java.util.Map.Entry;
|
2009-01-24 19:08:57 -05:00
|
|
|
import java.util.Scanner;
|
|
|
|
import java.util.TreeMap;
|
2009-08-12 15:35:24 -04:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
2009-02-02 15:50:04 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SeasonEpisodeFilter;
|
2010-11-01 05:56:20 -04:00
|
|
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
2009-03-12 16:08:42 -04:00
|
|
|
import net.sourceforge.tuned.FileUtilities;
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
|
|
|
|
public class SeriesNameMatcher {
|
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
2012-02-09 08:50:14 -05:00
|
|
|
protected DateMatcher dateMatcher = new DateMatcher();
|
2012-01-01 22:48:24 -05:00
|
|
|
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
protected int commonWordSequenceMaxStartIndex;
|
2012-01-01 22:48:24 -05:00
|
|
|
protected Comparator<String> commonWordComparator;
|
|
|
|
|
|
|
|
|
|
|
|
public SeriesNameMatcher() {
|
2012-01-03 04:23:03 -05:00
|
|
|
this(String.CASE_INSENSITIVE_ORDER, 3);
|
2012-01-01 22:48:24 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public SeriesNameMatcher(Comparator<String> comparator) {
|
2012-01-03 04:23:03 -05:00
|
|
|
this(comparator, 3);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public SeriesNameMatcher(Comparator<String> commonWordComparator, int commonWordSequenceMaxStartIndex) {
|
|
|
|
this.commonWordSequenceMaxStartIndex = commonWordSequenceMaxStartIndex;
|
|
|
|
this.commonWordComparator = commonWordComparator;
|
2012-01-01 22:48:24 -05:00
|
|
|
}
|
2009-02-02 15:50:04 -05:00
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-02-03 15:36:57 -05:00
|
|
|
public Collection<String> matchAll(File[] files) {
|
2009-01-24 19:08:57 -05:00
|
|
|
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
|
|
|
|
2009-02-02 15:50:04 -05:00
|
|
|
// group files by parent folder
|
|
|
|
for (Entry<File, String[]> entry : mapNamesByFolder(files).entrySet()) {
|
|
|
|
String parent = entry.getKey().getName();
|
|
|
|
String[] names = entry.getValue();
|
|
|
|
|
|
|
|
for (String nameMatch : matchAll(names)) {
|
|
|
|
String commonMatch = matchByFirstCommonWordSequence(nameMatch, parent);
|
2009-08-12 15:35:24 -04:00
|
|
|
float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch);
|
2009-02-02 15:50:04 -05:00
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
// prefer common match, but only if it's very similar to the original match
|
|
|
|
seriesNames.add(similarity > 0.7 ? commonMatch : nameMatch);
|
2009-02-02 15:50:04 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return seriesNames;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-02-03 15:36:57 -05:00
|
|
|
public Collection<String> matchAll(String[] names) {
|
2009-02-02 15:50:04 -05:00
|
|
|
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
|
|
|
|
2009-02-03 15:36:57 -05:00
|
|
|
// allow matching of a small number of episodes, by setting threshold = length if length < 5
|
|
|
|
int threshold = Math.min(names.length, 5);
|
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
// match common word sequences (likely series names)
|
|
|
|
SeriesNameCollection whitelist = new SeriesNameCollection();
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2012-02-09 08:50:14 -05:00
|
|
|
// focus chars before the SxE / Date pattern when matching by common word sequence
|
2011-12-28 19:41:27 -05:00
|
|
|
String[] focus = Arrays.copyOf(names, names.length);
|
|
|
|
for (int i = 0; i < focus.length; i++) {
|
2012-02-09 08:50:14 -05:00
|
|
|
int sxePos = seasonEpisodeMatcher.find(focus[i], 0);
|
|
|
|
if (sxePos >= 0) {
|
|
|
|
focus[i] = focus[i].substring(0, sxePos);
|
|
|
|
} else {
|
|
|
|
int datePos = dateMatcher.find(focus[i], 0);
|
|
|
|
if (datePos >= 0) {
|
|
|
|
focus[i] = focus[i].substring(0, datePos);
|
|
|
|
}
|
2011-12-28 19:41:27 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
whitelist.addAll(deepMatchAll(focus, threshold));
|
2009-08-12 15:35:24 -04:00
|
|
|
|
|
|
|
// 1. use pattern matching
|
2012-01-01 22:48:24 -05:00
|
|
|
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ), threshold, false));
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
// 2. use common word sequences
|
|
|
|
seriesNames.addAll(whitelist);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
return seriesNames;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
/**
|
|
|
|
* Try to match and verify all series names using known season episode patterns.
|
|
|
|
*
|
2009-02-02 15:50:04 -05:00
|
|
|
* @param names episode names
|
|
|
|
* @return series names that have been matched one or multiple times depending on the
|
|
|
|
* threshold
|
2009-01-24 19:08:57 -05:00
|
|
|
*/
|
2010-11-01 05:56:20 -04:00
|
|
|
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
2012-01-01 22:48:24 -05:00
|
|
|
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, commonWordComparator);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
for (String name : names) {
|
2009-08-12 15:35:24 -04:00
|
|
|
// use normalized name
|
|
|
|
name = normalize(name);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2010-11-01 05:56:20 -04:00
|
|
|
Matcher prefix = prefixPattern.matcher(name);
|
2012-02-09 08:50:14 -05:00
|
|
|
int prefixEnd = prefix.find() ? prefix.end() : 0;
|
2009-08-12 15:35:24 -04:00
|
|
|
|
2012-02-09 08:50:14 -05:00
|
|
|
int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd);
|
2010-11-01 05:56:20 -04:00
|
|
|
if (sxePosition > 0) {
|
|
|
|
String hit = name.substring(0, sxePosition).trim();
|
|
|
|
List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
|
|
|
|
|
|
|
|
if (!strict && sxe.size() == 1 && sxe.get(0).season >= 0) {
|
|
|
|
// bypass threshold if hit is likely to be genuine
|
|
|
|
thresholdCollection.addDirect(hit);
|
|
|
|
} else {
|
|
|
|
// require multiple matches, if hit might be a false match
|
|
|
|
thresholdCollection.add(hit);
|
|
|
|
}
|
2012-02-09 08:50:14 -05:00
|
|
|
} else {
|
|
|
|
// try date pattern as fallback
|
|
|
|
int datePosition = dateMatcher.find(name, prefixEnd);
|
|
|
|
if (datePosition > 0) {
|
|
|
|
thresholdCollection.addDirect(name.substring(0, datePosition).trim());
|
|
|
|
}
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
2012-02-09 08:50:14 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2010-11-01 05:56:20 -04:00
|
|
|
return thresholdCollection;
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
/**
|
|
|
|
* Try to match all common word sequences in the given list.
|
|
|
|
*
|
|
|
|
* @param names list of episode names
|
|
|
|
* @return all common word sequences that have been found
|
|
|
|
*/
|
2009-02-03 15:36:57 -05:00
|
|
|
private Collection<String> deepMatchAll(String[] names, int threshold) {
|
2009-02-02 15:50:04 -05:00
|
|
|
// can't use common word sequence matching for less than 2 names
|
|
|
|
if (names.length < 2 || names.length < threshold) {
|
2009-08-12 15:35:24 -04:00
|
|
|
return emptySet();
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
String common = matchByFirstCommonWordSequence(names);
|
|
|
|
|
|
|
|
if (common != null) {
|
|
|
|
// common word sequence found
|
2009-08-12 15:35:24 -04:00
|
|
|
return singleton(common);
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// recursive divide and conquer
|
|
|
|
List<String> results = new ArrayList<String>();
|
|
|
|
|
2009-02-02 15:50:04 -05:00
|
|
|
// split list in two and try to match common word sequence on those
|
2009-02-03 15:36:57 -05:00
|
|
|
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold));
|
|
|
|
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold));
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
/**
|
|
|
|
* Try to match a series name from the given episode name using known season episode
|
|
|
|
* patterns.
|
|
|
|
*
|
|
|
|
* @param name episode name
|
|
|
|
* @return a substring of the given name that ends before the first occurrence of a season
|
2009-02-02 15:50:04 -05:00
|
|
|
* episode pattern, or null if there is no such pattern
|
2009-01-24 19:08:57 -05:00
|
|
|
*/
|
2012-02-09 08:50:14 -05:00
|
|
|
public String matchByEpisodeIdentifier(String name) {
|
2009-07-23 10:25:43 -04:00
|
|
|
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
2009-01-24 19:08:57 -05:00
|
|
|
if (seasonEpisodePosition > 0) {
|
|
|
|
// series name ends at the first season episode pattern
|
|
|
|
return normalize(name.substring(0, seasonEpisodePosition));
|
|
|
|
}
|
|
|
|
|
2012-02-09 08:50:14 -05:00
|
|
|
int datePosition = dateMatcher.find(name, 0);
|
|
|
|
if (datePosition > 0) {
|
|
|
|
// series name ends at the first season episode pattern
|
|
|
|
return normalize(name.substring(0, datePosition));
|
|
|
|
}
|
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
/**
|
|
|
|
* Try to match a series name from the first common word sequence.
|
|
|
|
*
|
2009-02-03 15:36:57 -05:00
|
|
|
* @param names various episode names (at least two)
|
2009-01-24 19:08:57 -05:00
|
|
|
* @return a word sequence all episode names have in common, or null
|
2009-02-03 15:36:57 -05:00
|
|
|
* @throws IllegalArgumentException if less than 2 episode names are given
|
2009-01-24 19:08:57 -05:00
|
|
|
*/
|
2009-02-02 15:50:04 -05:00
|
|
|
public String matchByFirstCommonWordSequence(String... names) {
|
|
|
|
if (names.length < 2) {
|
|
|
|
throw new IllegalArgumentException("Can't match common sequence from less than two names");
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
String[] common = null;
|
|
|
|
|
|
|
|
for (String name : names) {
|
|
|
|
String[] words = normalize(name).split("\\s+");
|
|
|
|
|
|
|
|
if (common == null) {
|
|
|
|
// initialize common with current word array
|
|
|
|
common = words;
|
|
|
|
} else {
|
|
|
|
// find common sequence
|
2012-01-01 22:48:24 -05:00
|
|
|
common = firstCommonSequence(common, words, commonWordSequenceMaxStartIndex, commonWordComparator);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
if (common == null) {
|
|
|
|
// no common sequence
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-02 15:50:04 -05:00
|
|
|
if (common == null)
|
|
|
|
return null;
|
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
return join(common, " ");
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
protected String normalize(String name) {
|
2009-08-12 15:35:24 -04:00
|
|
|
// remove group names and checksums, any [...] or (...)
|
2012-01-01 22:48:24 -05:00
|
|
|
name = normalizeBrackets(name);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2011-11-07 11:25:38 -05:00
|
|
|
// remove/normalize special characters
|
2012-01-01 22:48:24 -05:00
|
|
|
name = normalizePunctuation(name);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
return name;
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
protected <T> T[] firstCommonSequence(T[] seq1, T[] seq2, int maxStartIndex, Comparator<T> equalsComparator) {
|
|
|
|
for (int i = 0; i < seq1.length && i <= maxStartIndex; i++) {
|
|
|
|
for (int j = 0; j < seq2.length && j <= maxStartIndex; j++) {
|
2009-01-24 19:08:57 -05:00
|
|
|
// common sequence length
|
|
|
|
int len = 0;
|
|
|
|
|
|
|
|
// iterate over common sequence
|
|
|
|
while ((i + len < seq1.length) && (j + len < seq2.length) && (equalsComparator.compare(seq1[i + len], seq2[j + len]) == 0)) {
|
|
|
|
len++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// check if a common sequence was found
|
|
|
|
if (len > 0) {
|
|
|
|
if (i == 0 && len == seq1.length)
|
|
|
|
return seq1;
|
|
|
|
|
|
|
|
return Arrays.copyOfRange(seq1, i, i + len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// no intersection at all
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-02-02 15:50:04 -05:00
|
|
|
private Map<File, String[]> mapNamesByFolder(File... files) {
|
|
|
|
Map<File, List<File>> filesByFolder = new LinkedHashMap<File, List<File>>();
|
|
|
|
|
|
|
|
for (File file : files) {
|
|
|
|
File folder = file.getParentFile();
|
|
|
|
|
|
|
|
List<File> list = filesByFolder.get(folder);
|
|
|
|
|
|
|
|
if (list == null) {
|
|
|
|
list = new ArrayList<File>();
|
|
|
|
filesByFolder.put(folder, list);
|
|
|
|
}
|
|
|
|
|
|
|
|
list.add(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert folder->files map to folder->names map
|
|
|
|
Map<File, String[]> namesByFolder = new LinkedHashMap<File, String[]>();
|
|
|
|
|
|
|
|
for (Entry<File, List<File>> entry : filesByFolder.entrySet()) {
|
2009-03-12 16:08:42 -04:00
|
|
|
namesByFolder.put(entry.getKey(), names(entry.getValue()));
|
2009-02-02 15:50:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return namesByFolder;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-05-25 16:13:30 -04:00
|
|
|
protected String[] names(Collection<File> files) {
|
2009-03-12 16:08:42 -04:00
|
|
|
String[] names = new String[files.size()];
|
|
|
|
|
2009-05-25 16:13:30 -04:00
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
// fill array
|
|
|
|
for (File file : files) {
|
|
|
|
names[i++] = FileUtilities.getName(file);
|
2009-03-12 16:08:42 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return names;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
protected static class SeriesNameCollection extends AbstractCollection<String> {
|
|
|
|
|
|
|
|
private final Map<String, String> data = new LinkedHashMap<String, String>();
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public boolean add(String value) {
|
2011-11-28 05:24:46 -05:00
|
|
|
value = value.trim();
|
|
|
|
|
|
|
|
// require series name to have at least two characters
|
|
|
|
if (value.length() < 2) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
String current = data.get(key(value));
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2011-11-07 11:27:52 -05:00
|
|
|
// prefer strings with similar upper/lower case ratio (e.g. prefer Roswell over roswell)
|
2009-01-24 19:08:57 -05:00
|
|
|
if (current == null || firstCharacterCaseBalance(current) < firstCharacterCaseBalance(value)) {
|
2009-08-12 15:35:24 -04:00
|
|
|
data.put(key(value), value);
|
2009-01-24 19:08:57 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-08-12 15:35:24 -04:00
|
|
|
protected String key(Object value) {
|
|
|
|
return value.toString().toLowerCase();
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
protected float firstCharacterCaseBalance(String s) {
|
|
|
|
int upper = 0;
|
|
|
|
int lower = 0;
|
|
|
|
|
2009-02-03 15:36:57 -05:00
|
|
|
Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
while (scanner.hasNext()) {
|
|
|
|
char c = scanner.next().charAt(0);
|
|
|
|
|
|
|
|
if (Character.isLowerCase(c))
|
|
|
|
lower++;
|
|
|
|
else if (Character.isUpperCase(c))
|
|
|
|
upper++;
|
|
|
|
}
|
|
|
|
|
2009-02-03 15:36:57 -05:00
|
|
|
// give upper case characters a slight boost over lower case characters
|
2009-01-24 19:08:57 -05:00
|
|
|
return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
2009-08-12 15:35:24 -04:00
|
|
|
public boolean contains(Object value) {
|
|
|
|
return data.containsKey(key(value));
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public Iterator<String> iterator() {
|
|
|
|
return data.values().iterator();
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public int size() {
|
|
|
|
return data.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
protected static class ThresholdCollection<E> extends AbstractCollection<E> {
|
|
|
|
|
|
|
|
private final Collection<E> heaven;
|
|
|
|
private final Map<E, Collection<E>> limbo;
|
|
|
|
|
|
|
|
private final int threshold;
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
public ThresholdCollection(int threshold, Comparator<E> equalityComparator) {
|
|
|
|
this.heaven = new ArrayList<E>();
|
|
|
|
this.limbo = new TreeMap<E, Collection<E>>(equalityComparator);
|
|
|
|
this.threshold = threshold;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
2009-02-02 15:50:04 -05:00
|
|
|
public boolean add(E value) {
|
|
|
|
Collection<E> buffer = limbo.get(value);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
if (buffer == null) {
|
|
|
|
// initialize buffer
|
|
|
|
buffer = new ArrayList<E>(threshold);
|
2009-02-02 15:50:04 -05:00
|
|
|
limbo.put(value, buffer);
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (buffer == heaven) {
|
|
|
|
// threshold reached
|
2009-02-02 15:50:04 -05:00
|
|
|
heaven.add(value);
|
2009-01-24 19:08:57 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// add element to buffer
|
2009-02-02 15:50:04 -05:00
|
|
|
buffer.add(value);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
// check if threshold has been reached
|
|
|
|
if (buffer.size() >= threshold) {
|
|
|
|
heaven.addAll(buffer);
|
|
|
|
|
|
|
|
// replace buffer with heaven
|
2009-02-02 15:50:04 -05:00
|
|
|
limbo.put(value, heaven);
|
2009-01-24 19:08:57 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2010-11-01 05:56:20 -04:00
|
|
|
public boolean addDirect(E element) {
|
|
|
|
return heaven.add(element);
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public Iterator<E> iterator() {
|
|
|
|
return heaven.iterator();
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public int size() {
|
|
|
|
return heaven.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|