* high-performance locale-aware common-sequence-matching via CollatorKey
* boost series name detection speed
This commit is contained in:
parent
b2681508ef
commit
0f2468fc5f
@ -102,7 +102,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||||||
int cws = 0; // common word sequence
|
int cws = 0; // common word sequence
|
||||||
double max = mediaFiles.size();
|
double max = mediaFiles.size();
|
||||||
|
|
||||||
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(getLenientCollator(locale));
|
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale);
|
||||||
Collection<String> cwsList = emptySet();
|
Collection<String> cwsList = emptySet();
|
||||||
if (max >= 5) {
|
if (max >= 5) {
|
||||||
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
|
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
|
||||||
|
@ -4,6 +4,7 @@ package net.sourceforge.filebot.media;
|
|||||||
|
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static net.sourceforge.filebot.MediaTypes.*;
|
import static net.sourceforge.filebot.MediaTypes.*;
|
||||||
|
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
||||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import static net.sourceforge.tuned.FileUtilities.*;
|
import static net.sourceforge.tuned.FileUtilities.*;
|
||||||
|
|
||||||
@ -11,6 +12,7 @@ import java.io.File;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.text.CollationKey;
|
||||||
import java.text.Collator;
|
import java.text.Collator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
@ -34,6 +36,7 @@ import java.util.regex.Pattern;
|
|||||||
|
|
||||||
import net.sourceforge.filebot.MediaTypes;
|
import net.sourceforge.filebot.MediaTypes;
|
||||||
import net.sourceforge.filebot.WebServices;
|
import net.sourceforge.filebot.WebServices;
|
||||||
|
import net.sourceforge.filebot.similarity.CommonSequenceMatcher;
|
||||||
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
||||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
||||||
@ -144,22 +147,15 @@ public class MediaDetection {
|
|||||||
|
|
||||||
// cross-reference known series names against file structure
|
// cross-reference known series names against file structure
|
||||||
try {
|
try {
|
||||||
Set<String> folders = new LinkedHashSet<String>();
|
Set<String> filenames = new LinkedHashSet<String>();
|
||||||
for (File f : files) {
|
for (File f : files) {
|
||||||
for (int i = 0; i < 3 && f != null; i++, f = f.getParentFile()) {
|
for (int i = 0; i < 3 && f != null; i++, f = f.getParentFile()) {
|
||||||
if (i != 0) {
|
filenames.add(f.getName());
|
||||||
folders.add(f.getName());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// match know name from filename if there is not enough context for CWS matching
|
|
||||||
if (files.size() == 1) {
|
|
||||||
folders.add(files.iterator().next().getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
// match folder names against known series names
|
// match folder names against known series names
|
||||||
for (TheTVDBSearchResult match : matchSeriesByName(folders.toArray(new String[0]))) {
|
for (TheTVDBSearchResult match : matchSeriesByName(filenames.toArray(new String[0]))) {
|
||||||
names.put(match.getName().toLowerCase(), match.getName());
|
names.put(match.getName().toLowerCase(), match.getName());
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@ -167,8 +163,7 @@ public class MediaDetection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// match common word sequence and clean detected word sequence from unwanted elements
|
// match common word sequence and clean detected word sequence from unwanted elements
|
||||||
SeriesNameMatcher matcher = new SeriesNameMatcher(getLenientCollator(locale));
|
Collection<String> matches = new SeriesNameMatcher(locale).matchAll(files.toArray(new File[files.size()]));
|
||||||
Collection<String> matches = matcher.matchAll(files.toArray(new File[files.size()]));
|
|
||||||
try {
|
try {
|
||||||
matches = stripReleaseInfo(matches, true);
|
matches = stripReleaseInfo(matches, true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@ -182,16 +177,27 @@ public class MediaDetection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static Collection<TheTVDBSearchResult> matchSeriesByName(String... names) throws Exception {
|
private static final HashMap<TheTVDBSearchResult, String> seriesNameIndex = new HashMap<TheTVDBSearchResult, String>(32768);
|
||||||
|
|
||||||
|
|
||||||
|
public static List<TheTVDBSearchResult> matchSeriesByName(String... names) throws Exception {
|
||||||
final HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(0);
|
final HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(0);
|
||||||
final Map<TheTVDBSearchResult, String> matchMap = new HashMap<TheTVDBSearchResult, String>();
|
final Map<TheTVDBSearchResult, String> matchMap = new HashMap<TheTVDBSearchResult, String>();
|
||||||
|
|
||||||
for (final TheTVDBSearchResult entry : releaseInfo.getSeriesList()) {
|
synchronized (seriesNameIndex) {
|
||||||
|
if (seriesNameIndex.isEmpty()) {
|
||||||
|
for (TheTVDBSearchResult entry : releaseInfo.getSeriesList()) {
|
||||||
|
seriesNameIndex.put(entry, nameMatcher.normalize(entry.getName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Entry<TheTVDBSearchResult, String> it : seriesNameIndex.entrySet()) {
|
||||||
for (String name : names) {
|
for (String name : names) {
|
||||||
String identifier = nameMatcher.normalize(entry.getName());
|
String identifier = it.getValue();
|
||||||
String commonName = nameMatcher.matchByFirstCommonWordSequence(name, identifier);
|
String commonName = nameMatcher.matchFirstCommonSequence(name, identifier);
|
||||||
if (commonName != null && commonName.length() >= identifier.length()) {
|
if (commonName != null && commonName.length() >= identifier.length()) {
|
||||||
matchMap.put(entry, commonName);
|
matchMap.put(it.getKey(), commonName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -215,13 +221,13 @@ public class MediaDetection {
|
|||||||
final Map<AnidbSearchResult, String> matchMap = new HashMap<AnidbSearchResult, String>();
|
final Map<AnidbSearchResult, String> matchMap = new HashMap<AnidbSearchResult, String>();
|
||||||
|
|
||||||
for (final AnidbSearchResult entry : WebServices.AniDB.getAnimeTitles()) {
|
for (final AnidbSearchResult entry : WebServices.AniDB.getAnimeTitles()) {
|
||||||
for (String name : names) {
|
for (String identifier : new String[] { entry.getPrimaryTitle(), entry.getOfficialTitle("en") }) {
|
||||||
for (String identifier : new String[] { entry.getPrimaryTitle(), entry.getOfficialTitle("en") }) {
|
if (identifier == null || identifier.isEmpty())
|
||||||
if (identifier == null || identifier.isEmpty())
|
continue;
|
||||||
continue;
|
|
||||||
|
identifier = nameMatcher.normalize(identifier);
|
||||||
identifier = nameMatcher.normalize(entry.getName());
|
for (String name : names) {
|
||||||
String commonName = nameMatcher.matchByFirstCommonWordSequence(name, identifier);
|
String commonName = nameMatcher.matchFirstCommonSequence(name, identifier);
|
||||||
if (commonName != null && commonName.length() >= identifier.length()) {
|
if (commonName != null && commonName.length() >= identifier.length()) {
|
||||||
matchMap.put(entry, commonName);
|
matchMap.put(entry, commonName);
|
||||||
}
|
}
|
||||||
@ -302,10 +308,10 @@ public class MediaDetection {
|
|||||||
for (final Movie movie : releaseInfo.getMovieList()) {
|
for (final Movie movie : releaseInfo.getMovieList()) {
|
||||||
for (String name : files) {
|
for (String name : files) {
|
||||||
String movieIdentifier = movie.getName();
|
String movieIdentifier = movie.getName();
|
||||||
String commonName = nameMatcher.matchByFirstCommonWordSequence(name, movieIdentifier);
|
String commonName = nameMatcher.matchFirstCommonSequence(name, movieIdentifier);
|
||||||
if (commonName != null && commonName.length() >= movieIdentifier.length()) {
|
if (commonName != null && commonName.length() >= movieIdentifier.length()) {
|
||||||
String strictMovieIdentifier = movie.getName() + " " + movie.getYear();
|
String strictMovieIdentifier = movie.getName() + " " + movie.getYear();
|
||||||
String strictCommonName = nameMatcher.matchByFirstCommonWordSequence(name, strictMovieIdentifier);
|
String strictCommonName = nameMatcher.matchFirstCommonSequence(name, strictMovieIdentifier);
|
||||||
if (strictCommonName != null && strictCommonName.length() >= strictMovieIdentifier.length()) {
|
if (strictCommonName != null && strictCommonName.length() >= strictMovieIdentifier.length()) {
|
||||||
// prefer strict match
|
// prefer strict match
|
||||||
matchMap.put(movie, strictCommonName);
|
matchMap.put(movie, strictCommonName);
|
||||||
@ -453,38 +459,34 @@ public class MediaDetection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public static Comparator<String> getLenientCollator(Locale locale) {
|
|
||||||
// use maximum strength collator by default
|
|
||||||
final Collator collator = Collator.getInstance(locale);
|
|
||||||
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
|
||||||
collator.setStrength(Collator.PRIMARY);
|
|
||||||
|
|
||||||
return (Comparator) collator;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k)
|
* Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k)
|
||||||
*/
|
*/
|
||||||
private static class HighPerformanceMatcher extends SeriesNameMatcher {
|
private static class HighPerformanceMatcher extends CommonSequenceMatcher {
|
||||||
|
|
||||||
private static final Map<String, String> transformCache = synchronizedMap(new WeakHashMap<String, String>(65536));
|
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
|
||||||
|
|
||||||
|
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new WeakHashMap<String, CollationKey[]>(65536));
|
||||||
|
|
||||||
|
|
||||||
public HighPerformanceMatcher(int commonWordSequenceMaxStartIndex) {
|
public HighPerformanceMatcher(int commonWordSequenceMaxStartIndex) {
|
||||||
super(String.CASE_INSENSITIVE_ORDER, commonWordSequenceMaxStartIndex); // 3-4x faster than a Collator
|
super(collator, commonWordSequenceMaxStartIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String normalize(String source) {
|
protected CollationKey[] split(String sequence) {
|
||||||
String value = transformCache.get(source);
|
CollationKey[] value = transformCache.get(sequence);
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
value = normalizePunctuation(source); // only normalize punctuation, make sure we keep the year (important for movie matching)
|
value = super.split(normalize(sequence));
|
||||||
transformCache.put(source, value);
|
transformCache.put(sequence, value);
|
||||||
}
|
}
|
||||||
return transformCache.get(source);
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String normalize(String sequence) {
|
||||||
|
return normalizePunctuation(sequence).toLowerCase(); // only normalize punctuation, make sure we keep the year (important for movie matching)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
|
import static java.util.Arrays.*;
|
||||||
|
import static java.util.Collections.*;
|
||||||
|
|
||||||
|
import java.text.CollationKey;
|
||||||
|
import java.text.Collator;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.WeakHashMap;
|
||||||
|
|
||||||
|
|
||||||
|
public class CommonSequenceMatcher {
|
||||||
|
|
||||||
|
public static Collator getLenientCollator(Locale locale) {
|
||||||
|
// use maximum strength collator by default
|
||||||
|
Collator collator = Collator.getInstance(locale);
|
||||||
|
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
||||||
|
collator.setStrength(Collator.PRIMARY);
|
||||||
|
return collator;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected final Collator collator;
|
||||||
|
protected final int commonSequenceMaxStartIndex;
|
||||||
|
|
||||||
|
|
||||||
|
public CommonSequenceMatcher(Collator collator, int commonSequenceMaxStartIndex) {
|
||||||
|
this.collator = collator;
|
||||||
|
this.commonSequenceMaxStartIndex = commonSequenceMaxStartIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Collator getCollator() {
|
||||||
|
return collator;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String matchFirstCommonSequence(String... names) {
|
||||||
|
CollationKey[] common = null;
|
||||||
|
|
||||||
|
for (String it : names) {
|
||||||
|
CollationKey[] words = split(it);
|
||||||
|
|
||||||
|
if (common == null) {
|
||||||
|
// initialize common with current word array
|
||||||
|
common = words;
|
||||||
|
} else {
|
||||||
|
// find common sequence
|
||||||
|
common = firstCommonSequence(common, words, commonSequenceMaxStartIndex);
|
||||||
|
|
||||||
|
if (common == null) {
|
||||||
|
// no common sequence
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (common == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return synth(common);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected String synth(CollationKey[] keys) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (CollationKey it : keys) {
|
||||||
|
if (sb.length() > 0) {
|
||||||
|
sb.append(' ');
|
||||||
|
}
|
||||||
|
sb.append(it.getSourceString());
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected CollationKey[] split(String sequence) {
|
||||||
|
return getCollationKeys(sequence.split("\\s+"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new WeakHashMap<String, CollationKey>(256));
|
||||||
|
|
||||||
|
|
||||||
|
protected CollationKey[] getCollationKeys(String[] words) {
|
||||||
|
CollationKey[] keys = new CollationKey[words.length];
|
||||||
|
for (int i = 0; i < keys.length; i++) {
|
||||||
|
keys[i] = collationKeyDictionary.get(words[i]);
|
||||||
|
if (keys[i] == null) {
|
||||||
|
keys[i] = collator.getCollationKey(words[i]);
|
||||||
|
collationKeyDictionary.put(words[i], keys[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected <E extends Comparable<E>> E[] firstCommonSequence(E[] seq1, E[] seq2, int maxStartIndex) {
|
||||||
|
for (int i = 0; i < seq1.length && i <= maxStartIndex; i++) {
|
||||||
|
for (int j = 0; j < seq2.length && j <= maxStartIndex; j++) {
|
||||||
|
// common sequence length
|
||||||
|
int len = 0;
|
||||||
|
|
||||||
|
// iterate over common sequence
|
||||||
|
while ((i + len < seq1.length) && (j + len < seq2.length) && (seq1[i + len].compareTo(seq2[j + len]) == 0)) {
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if a common sequence was found
|
||||||
|
if (len > 0) {
|
||||||
|
if (i == 0 && len == seq1.length)
|
||||||
|
return seq1;
|
||||||
|
|
||||||
|
return copyOfRange(seq1, i, i + len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// no intersection at all
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -6,7 +6,7 @@ public class Normalization {
|
|||||||
|
|
||||||
public static String normalizePunctuation(String name) {
|
public static String normalizePunctuation(String name) {
|
||||||
// remove/normalize special characters
|
// remove/normalize special characters
|
||||||
name = name.replaceAll("['`´]+", "");
|
name = name.replaceAll("[`´‘’ʻ]+", "");
|
||||||
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
||||||
|
|
||||||
return name.trim();
|
return name.trim();
|
||||||
|
@ -3,15 +3,17 @@ package net.sourceforge.filebot.similarity;
|
|||||||
|
|
||||||
|
|
||||||
import static java.lang.Math.*;
|
import static java.lang.Math.*;
|
||||||
|
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
||||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
|
|
||||||
import java.text.Collator;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
|
|
||||||
public class SequenceMatchSimilarity implements SimilarityMetric {
|
public class SequenceMatchSimilarity implements SimilarityMetric {
|
||||||
|
|
||||||
|
private final CommonSequenceMatcher commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), 10);
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
String s1 = normalize(o1);
|
String s1 = normalize(o1);
|
||||||
@ -39,20 +41,7 @@ public class SequenceMatchSimilarity implements SimilarityMetric {
|
|||||||
|
|
||||||
|
|
||||||
protected String match(String s1, String s2) {
|
protected String match(String s1, String s2) {
|
||||||
// use maximum strength collator by default
|
return commonSequenceMatcher.matchFirstCommonSequence(s1, s2);
|
||||||
Collator collator = Collator.getInstance(Locale.ROOT);
|
|
||||||
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
|
||||||
collator.setStrength(Collator.TERTIARY);
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
SeriesNameMatcher matcher = new SeriesNameMatcher((Comparator) collator, 10) {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String normalize(String name) {
|
|
||||||
return name; // assume normalization has been done, no need to do that here again
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
return matcher.matchByFirstCommonWordSequence(s1, s2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,12 @@ package net.sourceforge.filebot.similarity;
|
|||||||
|
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static java.util.regex.Pattern.*;
|
import static java.util.regex.Pattern.*;
|
||||||
|
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
||||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import static net.sourceforge.tuned.StringUtilities.*;
|
import static net.sourceforge.tuned.StringUtilities.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.text.CollationKey;
|
||||||
import java.util.AbstractCollection;
|
import java.util.AbstractCollection;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@ -16,6 +18,7 @@ import java.util.Comparator;
|
|||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
@ -31,25 +34,25 @@ public class SeriesNameMatcher {
|
|||||||
|
|
||||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
||||||
protected DateMatcher dateMatcher = new DateMatcher();
|
protected DateMatcher dateMatcher = new DateMatcher();
|
||||||
|
|
||||||
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||||
|
|
||||||
protected int commonWordSequenceMaxStartIndex;
|
protected CommonSequenceMatcher commonSequenceMatcher;
|
||||||
protected Comparator<String> commonWordComparator;
|
|
||||||
|
|
||||||
|
|
||||||
public SeriesNameMatcher() {
|
public SeriesNameMatcher() {
|
||||||
this(String.CASE_INSENSITIVE_ORDER, 3);
|
this(Locale.ROOT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public SeriesNameMatcher(Comparator<String> comparator) {
|
public SeriesNameMatcher(Locale locale) {
|
||||||
this(comparator, 3);
|
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3) {
|
||||||
}
|
|
||||||
|
@Override
|
||||||
|
protected CollationKey[] split(String sequence) {
|
||||||
public SeriesNameMatcher(Comparator<String> commonWordComparator, int commonWordSequenceMaxStartIndex) {
|
return super.split(normalize(sequence));
|
||||||
this.commonWordSequenceMaxStartIndex = commonWordSequenceMaxStartIndex;
|
}
|
||||||
this.commonWordComparator = commonWordComparator;
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -62,7 +65,7 @@ public class SeriesNameMatcher {
|
|||||||
String[] names = entry.getValue();
|
String[] names = entry.getValue();
|
||||||
|
|
||||||
for (String nameMatch : matchAll(names)) {
|
for (String nameMatch : matchAll(names)) {
|
||||||
String commonMatch = matchByFirstCommonWordSequence(nameMatch, parent);
|
String commonMatch = commonSequenceMatcher.matchFirstCommonSequence(nameMatch, parent);
|
||||||
float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch);
|
float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch);
|
||||||
|
|
||||||
// prefer common match, but only if it's very similar to the original match
|
// prefer common match, but only if it's very similar to the original match
|
||||||
@ -116,7 +119,9 @@ public class SeriesNameMatcher {
|
|||||||
* threshold
|
* threshold
|
||||||
*/
|
*/
|
||||||
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
||||||
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, commonWordComparator);
|
@SuppressWarnings("unchecked")
|
||||||
|
Comparator<String> wordComparator = (Comparator) commonSequenceMatcher.getCollator();
|
||||||
|
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, wordComparator);
|
||||||
|
|
||||||
for (String name : names) {
|
for (String name : names) {
|
||||||
// use normalized name
|
// use normalized name
|
||||||
@ -163,7 +168,7 @@ public class SeriesNameMatcher {
|
|||||||
return emptySet();
|
return emptySet();
|
||||||
}
|
}
|
||||||
|
|
||||||
String common = matchByFirstCommonWordSequence(names);
|
String common = commonSequenceMatcher.matchFirstCommonSequence(names);
|
||||||
|
|
||||||
if (common != null) {
|
if (common != null) {
|
||||||
// common word sequence found
|
// common word sequence found
|
||||||
@ -218,29 +223,7 @@ public class SeriesNameMatcher {
|
|||||||
throw new IllegalArgumentException("Can't match common sequence from less than two names");
|
throw new IllegalArgumentException("Can't match common sequence from less than two names");
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] common = null;
|
return commonSequenceMatcher.matchFirstCommonSequence(names);
|
||||||
|
|
||||||
for (String name : names) {
|
|
||||||
String[] words = normalize(name).split("\\s+");
|
|
||||||
|
|
||||||
if (common == null) {
|
|
||||||
// initialize common with current word array
|
|
||||||
common = words;
|
|
||||||
} else {
|
|
||||||
// find common sequence
|
|
||||||
common = firstCommonSequence(common, words, commonWordSequenceMaxStartIndex, commonWordComparator);
|
|
||||||
|
|
||||||
if (common == null) {
|
|
||||||
// no common sequence
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (common == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
return join(common, " ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -180,13 +180,14 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
|||||||
|
|
||||||
// detect series names and create episode list fetch tasks
|
// detect series names and create episode list fetch tasks
|
||||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
|
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles, locale).entrySet()) {
|
||||||
List<List<File>> batchSets = new ArrayList<List<File>>();
|
final List<List<File>> batchSets = new ArrayList<List<File>>();
|
||||||
|
final Collection<String> queries = sameSeriesGroup.getValue();
|
||||||
|
|
||||||
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
|
if (queries != null && queries.size() > 0) {
|
||||||
// handle series name batch set all at once
|
// handle series name batch set all at once -> only 1 batch set
|
||||||
batchSets.add(new ArrayList<File>(sameSeriesGroup.getKey()));
|
batchSets.add(new ArrayList<File>(sameSeriesGroup.getKey()));
|
||||||
} else {
|
} else {
|
||||||
// these files don't seem to belong to any series -> handle folder per folder
|
// these files don't seem to belong to any series -> handle folder per folder -> multiple batch sets
|
||||||
batchSets.addAll(mapByFolder(sameSeriesGroup.getKey()).values());
|
batchSets.addAll(mapByFolder(sameSeriesGroup.getKey()).values());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -195,7 +196,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Match<File, ?>> call() throws Exception {
|
public List<Match<File, ?>> call() throws Exception {
|
||||||
return matchEpisodeSet(batchSet, sortOrder, locale, autodetection, parent);
|
return matchEpisodeSet(batchSet, queries, sortOrder, locale, autodetection, parent);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -246,23 +247,22 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<Match<File, ?>> matchEpisodeSet(final List<File> files, SortOrder sortOrder, Locale locale, boolean autodetection, Component parent) throws Exception {
|
public List<Match<File, ?>> matchEpisodeSet(final List<File> files, Collection<String> queries, SortOrder sortOrder, Locale locale, boolean autodetection, Component parent) throws Exception {
|
||||||
Set<Episode> episodes = emptySet();
|
Set<Episode> episodes = emptySet();
|
||||||
|
|
||||||
// detect series name and fetch episode list
|
// detect series name and fetch episode list
|
||||||
if (autodetection) {
|
if (autodetection) {
|
||||||
Collection<String> names = detectSeriesNames(files, locale);
|
if (queries != null && queries.size() > 0) {
|
||||||
if (names.size() > 0) {
|
|
||||||
// only allow one fetch session at a time so later requests can make use of cached results
|
// only allow one fetch session at a time so later requests can make use of cached results
|
||||||
synchronized (providerLock) {
|
synchronized (providerLock) {
|
||||||
episodes = fetchEpisodeSet(names, sortOrder, locale, parent);
|
episodes = fetchEpisodeSet(queries, sortOrder, locale, parent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// require user input if auto-detection has failed or has been disabled
|
// require user input if auto-detection has failed or has been disabled
|
||||||
if (episodes.isEmpty()) {
|
if (episodes.isEmpty()) {
|
||||||
String suggestion = new SeriesNameMatcher().matchByEpisodeIdentifier(getName(files.get(0)));
|
String suggestion = new SeriesNameMatcher(locale).matchByEpisodeIdentifier(getName(files.get(0)));
|
||||||
if (suggestion != null) {
|
if (suggestion != null) {
|
||||||
// clean media info / release group info / etc
|
// clean media info / release group info / etc
|
||||||
suggestion = stripReleaseInfo(suggestion);
|
suggestion = stripReleaseInfo(suggestion);
|
||||||
|
Loading…
Reference in New Issue
Block a user