* much faster matching for lots of files/episodes

This commit is contained in:
Reinhard Pointner 2009-07-24 20:38:47 +00:00
parent a860a6ab5d
commit 6d8c82df90
2 changed files with 46 additions and 35 deletions

View File

@ -27,18 +27,6 @@ public class Match<Value, Candidate> {
} }
/**
* Check if the given match has the same value or the same candidate. This method uses an
* <b>identity equality test</b>.
*
* @param match a match
* @return Returns <code>true</code> if the specified match has no value common.
*/
public boolean disjoint(Match<?, ?> match) {
return (value != match.value && candidate != match.candidate);
}
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (obj instanceof Match) { if (obj instanceof Match) {

View File

@ -6,11 +6,14 @@ import java.util.AbstractList;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap; import java.util.IdentityHashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
@ -36,7 +39,6 @@ public class Matcher<V, C> {
public synchronized List<Match<V, C>> match() throws InterruptedException { public synchronized List<Match<V, C>> match() throws InterruptedException {
// list of all combinations of values and candidates // list of all combinations of values and candidates
List<Match<V, C>> possibleMatches = new ArrayList<Match<V, C>>(values.size() * candidates.size()); List<Match<V, C>> possibleMatches = new ArrayList<Match<V, C>>(values.size() * candidates.size());
@ -91,7 +93,7 @@ public class Matcher<V, C> {
return; return;
} }
for (List<Match<V, C>> matchesWithEqualSimilarity : mapBySimilarity(possibleMatches, metrics[level]).values()) { for (Set<Match<V, C>> matchesWithEqualSimilarity : mapBySimilarity(possibleMatches, metrics[level]).values()) {
// some matches may already be unique // some matches may already be unique
List<Match<V, C>> disjointMatches = disjointMatches(matchesWithEqualSimilarity); List<Match<V, C>> disjointMatches = disjointMatches(matchesWithEqualSimilarity);
@ -120,22 +122,22 @@ public class Matcher<V, C> {
} }
protected SortedMap<Float, List<Match<V, C>>> mapBySimilarity(Collection<Match<V, C>> possibleMatches, SimilarityMetric metric) throws InterruptedException { protected SortedMap<Float, Set<Match<V, C>>> mapBySimilarity(Collection<Match<V, C>> possibleMatches, SimilarityMetric metric) throws InterruptedException {
// map sorted by similarity descending // map sorted by similarity descending
SortedMap<Float, List<Match<V, C>>> similarityMap = new TreeMap<Float, List<Match<V, C>>>(Collections.reverseOrder()); SortedMap<Float, Set<Match<V, C>>> similarityMap = new TreeMap<Float, Set<Match<V, C>>>(Collections.reverseOrder());
// use metric on all matches // use metric on all matches
for (Match<V, C> possibleMatch : possibleMatches) { for (Match<V, C> possibleMatch : possibleMatches) {
float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate()); float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate());
List<Match<V, C>> list = similarityMap.get(similarity); Set<Match<V, C>> matchSet = similarityMap.get(similarity);
if (list == null) { if (matchSet == null) {
list = new ArrayList<Match<V, C>>(); matchSet = new LinkedHashSet<Match<V, C>>();
similarityMap.put(similarity, list); similarityMap.put(similarity, matchSet);
} }
list.add(possibleMatch); matchSet.add(possibleMatch);
// unwind this thread if we have been interrupted // unwind this thread if we have been interrupted
if (Thread.interrupted()) { if (Thread.interrupted()) {
@ -148,21 +150,42 @@ public class Matcher<V, C> {
protected List<Match<V, C>> disjointMatches(Collection<Match<V, C>> collection) { protected List<Match<V, C>> disjointMatches(Collection<Match<V, C>> collection) {
List<Match<V, C>> disjointMatches = new ArrayList<Match<V, C>>(); Map<V, List<Match<V, C>>> matchesByValue = new HashMap<V, List<Match<V, C>>>();
Map<C, List<Match<V, C>>> matchesByCandidate = new HashMap<C, List<Match<V, C>>>();
for (Match<V, C> m1 : collection) { // map matches by value and candidate respectively
boolean disjoint = true; for (Match<V, C> match : collection) {
List<Match<V, C>> matchListForValue = matchesByValue.get(match.getValue());
List<Match<V, C>> matchListForCandidate = matchesByCandidate.get(match.getCandidate());
for (Match<V, C> m2 : collection) { // create list if necessary
// ignore same element if (matchListForValue == null) {
if (m1 != m2 && !m1.disjoint(m2)) { matchListForValue = new ArrayList<Match<V, C>>();
disjoint = false; matchesByValue.put(match.getValue(), matchListForValue);
break;
}
} }
if (disjoint) { // create list if necessary
disjointMatches.add(m1); if (matchListForCandidate == null) {
matchListForCandidate = new ArrayList<Match<V, C>>();
matchesByCandidate.put(match.getCandidate(), matchListForCandidate);
}
// add match to both lists
matchListForValue.add(match);
matchListForCandidate.add(match);
}
// collect disjoint matches
List<Match<V, C>> disjointMatches = new ArrayList<Match<V, C>>();
for (Match<V, C> match : collection) {
List<Match<V, C>> matchListForValue = matchesByValue.get(match.getValue());
List<Match<V, C>> matchListForCandidate = matchesByCandidate.get(match.getCandidate());
// check if match is the only element in both lists
if (matchListForValue.size() == 1 && matchListForValue.equals(matchListForCandidate)) {
// match is disjoint :)
disjointMatches.add(matchListForValue.get(0));
} }
} }