* much faster matching for lots of files/episodes
This commit is contained in:
parent
a860a6ab5d
commit
6d8c82df90
|
@ -27,18 +27,6 @@ public class Match<Value, Candidate> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the given match has the same value or the same candidate. This method uses an
|
|
||||||
* <b>identity equality test</b>.
|
|
||||||
*
|
|
||||||
* @param match a match
|
|
||||||
* @return Returns <code>true</code> if the specified match has no value common.
|
|
||||||
*/
|
|
||||||
public boolean disjoint(Match<?, ?> match) {
|
|
||||||
return (value != match.value && candidate != match.candidate);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (obj instanceof Match) {
|
if (obj instanceof Match) {
|
||||||
|
|
|
@ -6,11 +6,14 @@ import java.util.AbstractList;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.IdentityHashMap;
|
import java.util.IdentityHashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.SortedMap;
|
import java.util.SortedMap;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
@ -36,7 +39,6 @@ public class Matcher<V, C> {
|
||||||
|
|
||||||
|
|
||||||
public synchronized List<Match<V, C>> match() throws InterruptedException {
|
public synchronized List<Match<V, C>> match() throws InterruptedException {
|
||||||
|
|
||||||
// list of all combinations of values and candidates
|
// list of all combinations of values and candidates
|
||||||
List<Match<V, C>> possibleMatches = new ArrayList<Match<V, C>>(values.size() * candidates.size());
|
List<Match<V, C>> possibleMatches = new ArrayList<Match<V, C>>(values.size() * candidates.size());
|
||||||
|
|
||||||
|
@ -91,7 +93,7 @@ public class Matcher<V, C> {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (List<Match<V, C>> matchesWithEqualSimilarity : mapBySimilarity(possibleMatches, metrics[level]).values()) {
|
for (Set<Match<V, C>> matchesWithEqualSimilarity : mapBySimilarity(possibleMatches, metrics[level]).values()) {
|
||||||
// some matches may already be unique
|
// some matches may already be unique
|
||||||
List<Match<V, C>> disjointMatches = disjointMatches(matchesWithEqualSimilarity);
|
List<Match<V, C>> disjointMatches = disjointMatches(matchesWithEqualSimilarity);
|
||||||
|
|
||||||
|
@ -120,22 +122,22 @@ public class Matcher<V, C> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected SortedMap<Float, List<Match<V, C>>> mapBySimilarity(Collection<Match<V, C>> possibleMatches, SimilarityMetric metric) throws InterruptedException {
|
protected SortedMap<Float, Set<Match<V, C>>> mapBySimilarity(Collection<Match<V, C>> possibleMatches, SimilarityMetric metric) throws InterruptedException {
|
||||||
// map sorted by similarity descending
|
// map sorted by similarity descending
|
||||||
SortedMap<Float, List<Match<V, C>>> similarityMap = new TreeMap<Float, List<Match<V, C>>>(Collections.reverseOrder());
|
SortedMap<Float, Set<Match<V, C>>> similarityMap = new TreeMap<Float, Set<Match<V, C>>>(Collections.reverseOrder());
|
||||||
|
|
||||||
// use metric on all matches
|
// use metric on all matches
|
||||||
for (Match<V, C> possibleMatch : possibleMatches) {
|
for (Match<V, C> possibleMatch : possibleMatches) {
|
||||||
float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate());
|
float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate());
|
||||||
|
|
||||||
List<Match<V, C>> list = similarityMap.get(similarity);
|
Set<Match<V, C>> matchSet = similarityMap.get(similarity);
|
||||||
|
|
||||||
if (list == null) {
|
if (matchSet == null) {
|
||||||
list = new ArrayList<Match<V, C>>();
|
matchSet = new LinkedHashSet<Match<V, C>>();
|
||||||
similarityMap.put(similarity, list);
|
similarityMap.put(similarity, matchSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
list.add(possibleMatch);
|
matchSet.add(possibleMatch);
|
||||||
|
|
||||||
// unwind this thread if we have been interrupted
|
// unwind this thread if we have been interrupted
|
||||||
if (Thread.interrupted()) {
|
if (Thread.interrupted()) {
|
||||||
|
@ -148,21 +150,42 @@ public class Matcher<V, C> {
|
||||||
|
|
||||||
|
|
||||||
protected List<Match<V, C>> disjointMatches(Collection<Match<V, C>> collection) {
|
protected List<Match<V, C>> disjointMatches(Collection<Match<V, C>> collection) {
|
||||||
|
Map<V, List<Match<V, C>>> matchesByValue = new HashMap<V, List<Match<V, C>>>();
|
||||||
|
Map<C, List<Match<V, C>>> matchesByCandidate = new HashMap<C, List<Match<V, C>>>();
|
||||||
|
|
||||||
|
// map matches by value and candidate respectively
|
||||||
|
for (Match<V, C> match : collection) {
|
||||||
|
List<Match<V, C>> matchListForValue = matchesByValue.get(match.getValue());
|
||||||
|
List<Match<V, C>> matchListForCandidate = matchesByCandidate.get(match.getCandidate());
|
||||||
|
|
||||||
|
// create list if necessary
|
||||||
|
if (matchListForValue == null) {
|
||||||
|
matchListForValue = new ArrayList<Match<V, C>>();
|
||||||
|
matchesByValue.put(match.getValue(), matchListForValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// create list if necessary
|
||||||
|
if (matchListForCandidate == null) {
|
||||||
|
matchListForCandidate = new ArrayList<Match<V, C>>();
|
||||||
|
matchesByCandidate.put(match.getCandidate(), matchListForCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add match to both lists
|
||||||
|
matchListForValue.add(match);
|
||||||
|
matchListForCandidate.add(match);
|
||||||
|
}
|
||||||
|
|
||||||
|
// collect disjoint matches
|
||||||
List<Match<V, C>> disjointMatches = new ArrayList<Match<V, C>>();
|
List<Match<V, C>> disjointMatches = new ArrayList<Match<V, C>>();
|
||||||
|
|
||||||
for (Match<V, C> m1 : collection) {
|
for (Match<V, C> match : collection) {
|
||||||
boolean disjoint = true;
|
List<Match<V, C>> matchListForValue = matchesByValue.get(match.getValue());
|
||||||
|
List<Match<V, C>> matchListForCandidate = matchesByCandidate.get(match.getCandidate());
|
||||||
|
|
||||||
for (Match<V, C> m2 : collection) {
|
// check if match is the only element in both lists
|
||||||
// ignore same element
|
if (matchListForValue.size() == 1 && matchListForValue.equals(matchListForCandidate)) {
|
||||||
if (m1 != m2 && !m1.disjoint(m2)) {
|
// match is disjoint :)
|
||||||
disjoint = false;
|
disjointMatches.add(matchListForValue.get(0));
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (disjoint) {
|
|
||||||
disjointMatches.add(m1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue