From 6d8c82df9082922ed2dfc9a3acde44646c6e5378 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Fri, 24 Jul 2009 20:38:47 +0000 Subject: [PATCH] * much faster matching for lots of files/episodes --- .../sourceforge/filebot/similarity/Match.java | 12 ---- .../filebot/similarity/Matcher.java | 69 ++++++++++++------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/source/net/sourceforge/filebot/similarity/Match.java b/source/net/sourceforge/filebot/similarity/Match.java index 3c984955..2f0ccd64 100644 --- a/source/net/sourceforge/filebot/similarity/Match.java +++ b/source/net/sourceforge/filebot/similarity/Match.java @@ -27,18 +27,6 @@ public class Match { } - /** - * Check if the given match has the same value or the same candidate. This method uses an - * identity equality test. - * - * @param match a match - * @return Returns true if the specified match has no value common. - */ - public boolean disjoint(Match match) { - return (value != match.value && candidate != match.candidate); - } - - @Override public boolean equals(Object obj) { if (obj instanceof Match) { diff --git a/source/net/sourceforge/filebot/similarity/Matcher.java b/source/net/sourceforge/filebot/similarity/Matcher.java index 23afa90c..a56ddd54 100644 --- a/source/net/sourceforge/filebot/similarity/Matcher.java +++ b/source/net/sourceforge/filebot/similarity/Matcher.java @@ -6,11 +6,14 @@ import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -24,7 +27,7 @@ public class Matcher { private final DisjointMatchCollection disjointMatchCollection; - + public Matcher(Collection values, Collection candidates, Collection metrics) { this.values = new LinkedList(values); this.candidates = new LinkedList(candidates); @@ -36,7 +39,6 @@ public class Matcher { public synchronized List> match() throws InterruptedException { - // list of all combinations of values and candidates List> possibleMatches = new ArrayList>(values.size() * candidates.size()); @@ -91,7 +93,7 @@ public class Matcher { return; } - for (List> matchesWithEqualSimilarity : mapBySimilarity(possibleMatches, metrics[level]).values()) { + for (Set> matchesWithEqualSimilarity : mapBySimilarity(possibleMatches, metrics[level]).values()) { // some matches may already be unique List> disjointMatches = disjointMatches(matchesWithEqualSimilarity); @@ -120,22 +122,22 @@ public class Matcher { } - protected SortedMap>> mapBySimilarity(Collection> possibleMatches, SimilarityMetric metric) throws InterruptedException { + protected SortedMap>> mapBySimilarity(Collection> possibleMatches, SimilarityMetric metric) throws InterruptedException { // map sorted by similarity descending - SortedMap>> similarityMap = new TreeMap>>(Collections.reverseOrder()); + SortedMap>> similarityMap = new TreeMap>>(Collections.reverseOrder()); // use metric on all matches for (Match possibleMatch : possibleMatches) { float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate()); - List> list = similarityMap.get(similarity); + Set> matchSet = similarityMap.get(similarity); - if (list == null) { - list = new ArrayList>(); - similarityMap.put(similarity, list); + if (matchSet == null) { + matchSet = new LinkedHashSet>(); + similarityMap.put(similarity, matchSet); } - list.add(possibleMatch); + matchSet.add(possibleMatch); // unwind this thread if we have been interrupted if (Thread.interrupted()) { @@ -148,28 +150,49 @@ public class Matcher { protected List> disjointMatches(Collection> collection) { - List> disjointMatches = new ArrayList>(); + Map>> matchesByValue = new HashMap>>(); + Map>> matchesByCandidate = new HashMap>>(); - for (Match m1 : collection) { - boolean disjoint = true; + // map matches by value and candidate respectively + for (Match match : collection) { + List> matchListForValue = matchesByValue.get(match.getValue()); + List> matchListForCandidate = matchesByCandidate.get(match.getCandidate()); - for (Match m2 : collection) { - // ignore same element - if (m1 != m2 && !m1.disjoint(m2)) { - disjoint = false; - break; - } + // create list if necessary + if (matchListForValue == null) { + matchListForValue = new ArrayList>(); + matchesByValue.put(match.getValue(), matchListForValue); } - if (disjoint) { - disjointMatches.add(m1); + // create list if necessary + if (matchListForCandidate == null) { + matchListForCandidate = new ArrayList>(); + matchesByCandidate.put(match.getCandidate(), matchListForCandidate); + } + + // add match to both lists + matchListForValue.add(match); + matchListForCandidate.add(match); + } + + // collect disjoint matches + List> disjointMatches = new ArrayList>(); + + for (Match match : collection) { + List> matchListForValue = matchesByValue.get(match.getValue()); + List> matchListForCandidate = matchesByCandidate.get(match.getCandidate()); + + // check if match is the only element in both lists + if (matchListForValue.size() == 1 && matchListForValue.equals(matchListForCandidate)) { + // match is disjoint :) + disjointMatches.add(matchListForValue.get(0)); } } return disjointMatches; } - + protected static class DisjointMatchCollection extends AbstractList> { private final List> matches = new ArrayList>(); @@ -177,7 +200,7 @@ public class Matcher { private final Map> values = new IdentityHashMap>(); private final Map> candidates = new IdentityHashMap>(); - + @Override public boolean add(Match match) { if (disjoint(match)) {