* reuse name normalization code
This commit is contained in:
parent
b8c96b8fbe
commit
6707a94518
|
@ -36,12 +36,6 @@ public final class VerificationUtilities {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String removeEmbeddedChecksum(String string) {
|
|
||||||
// match embedded checksum and surrounding brackets
|
|
||||||
return string.replaceAll("[\\(\\[]\\p{XDigit}{8}[\\]\\)]", "");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static String getHashFromVerificationFile(File file, HashType type, int maxDepth) throws IOException {
|
public static String getHashFromVerificationFile(File file, HashType type, int maxDepth) throws IOException {
|
||||||
return getHashFromVerificationFile(file.getParentFile(), file, type, 0, maxDepth);
|
return getHashFromVerificationFile(file.getParentFile(), file, type, 0, maxDepth);
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ package net.sourceforge.filebot.similarity;
|
||||||
import static java.lang.Math.*;
|
import static java.lang.Math.*;
|
||||||
import static java.util.Arrays.*;
|
import static java.util.Arrays.*;
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import static net.sourceforge.tuned.FileUtilities.*;
|
import static net.sourceforge.tuned.FileUtilities.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -287,10 +287,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
name = removeEmbeddedChecksum(name);
|
name = removeEmbeddedChecksum(name);
|
||||||
|
|
||||||
// remove/normalize special characters
|
// remove/normalize special characters
|
||||||
name = name.replaceAll("['`´]+", "");
|
name = normalizePunctuation(name);
|
||||||
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
|
||||||
|
|
||||||
return name.trim().toLowerCase();
|
return name.toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
package net.sourceforge.filebot.similarity;
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
||||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
||||||
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3;
|
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3;
|
||||||
|
@ -29,10 +30,10 @@ public class NameSimilarityMetric implements SimilarityMetric {
|
||||||
String name = object.toString();
|
String name = object.toString();
|
||||||
|
|
||||||
// normalize separators
|
// normalize separators
|
||||||
name = name.replaceAll("['`´]+", "").replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
name = normalizePunctuation(name);
|
||||||
|
|
||||||
// normalize case and trim
|
// normalize case and trim
|
||||||
return name.trim().toLowerCase();
|
return name.toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
|
public class Normalization {
|
||||||
|
|
||||||
|
public static String normalizePunctuation(String name) {
|
||||||
|
// remove/normalize special characters
|
||||||
|
name = name.replaceAll("['`´]+", "");
|
||||||
|
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
||||||
|
|
||||||
|
return name.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String normalizeBrackets(String name) {
|
||||||
|
// remove group names and checksums, any [...] or (...)
|
||||||
|
name = name.replaceAll("\\([^\\(]*\\)", " ");
|
||||||
|
name = name.replaceAll("\\[[^\\[]*\\]", " ");
|
||||||
|
name = name.replaceAll("\\{[^\\{]*\\}", " ");
|
||||||
|
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String removeEmbeddedChecksum(String string) {
|
||||||
|
// match embedded checksum and surrounding brackets
|
||||||
|
return string.replaceAll("[\\(\\[]\\p{XDigit}{8}[\\]\\)]", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -2,6 +2,9 @@
|
||||||
package net.sourceforge.filebot.similarity;
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
|
|
||||||
|
|
||||||
public class SubstringMetric implements SimilarityMetric {
|
public class SubstringMetric implements SimilarityMetric {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -23,7 +26,7 @@ public class SubstringMetric implements SimilarityMetric {
|
||||||
String name = object.toString();
|
String name = object.toString();
|
||||||
|
|
||||||
// normalize separators
|
// normalize separators
|
||||||
name = name.replaceAll("['`´]+", "").replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
name = normalizePunctuation(name);
|
||||||
|
|
||||||
// normalize case and trim
|
// normalize case and trim
|
||||||
return name.trim().toLowerCase();
|
return name.trim().toLowerCase();
|
||||||
|
|
|
@ -3,16 +3,17 @@ package net.sourceforge.filebot.web;
|
||||||
|
|
||||||
|
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
|
|
||||||
import java.util.AbstractList;
|
import java.util.AbstractList;
|
||||||
|
import java.util.AbstractMap.SimpleEntry;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.AbstractMap.SimpleEntry;
|
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
@ -123,10 +124,7 @@ class LocalSearch<T> {
|
||||||
|
|
||||||
protected String normalize(String value) {
|
protected String normalize(String value) {
|
||||||
// normalize separator, normalize case and trim
|
// normalize separator, normalize case and trim
|
||||||
value = value.replaceAll("['`´]+", "");
|
return normalizePunctuation(value).toLowerCase();
|
||||||
value = value.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
|
||||||
|
|
||||||
return value.trim().toLowerCase();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue