* reuse name normalization code

This commit is contained in:
Reinhard Pointner 2012-01-02 03:34:13 +00:00
parent b8c96b8fbe
commit 6707a94518
6 changed files with 61 additions and 35 deletions

View File

@ -36,12 +36,6 @@ public final class VerificationUtilities {
} }
public static String removeEmbeddedChecksum(String string) {
// match embedded checksum and surrounding brackets
return string.replaceAll("[\\(\\[]\\p{XDigit}{8}[\\]\\)]", "");
}
public static String getHashFromVerificationFile(File file, HashType type, int maxDepth) throws IOException { public static String getHashFromVerificationFile(File file, HashType type, int maxDepth) throws IOException {
return getHashFromVerificationFile(file.getParentFile(), file, type, 0, maxDepth); return getHashFromVerificationFile(file.getParentFile(), file, type, 0, maxDepth);
} }

View File

@ -5,7 +5,7 @@ package net.sourceforge.filebot.similarity;
import static java.lang.Math.*; import static java.lang.Math.*;
import static java.util.Arrays.*; import static java.util.Arrays.*;
import static java.util.Collections.*; import static java.util.Collections.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*; import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.FileUtilities.*; import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File; import java.io.File;
@ -287,10 +287,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
name = removeEmbeddedChecksum(name); name = removeEmbeddedChecksum(name);
// remove/normalize special characters // remove/normalize special characters
name = name.replaceAll("['`´]+", ""); name = normalizePunctuation(name);
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
return name.trim().toLowerCase(); return name.toLowerCase();
} }

View File

@ -2,6 +2,7 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static net.sourceforge.filebot.similarity.Normalization.*;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3; import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3;
@ -29,10 +30,10 @@ public class NameSimilarityMetric implements SimilarityMetric {
String name = object.toString(); String name = object.toString();
// normalize separators // normalize separators
name = name.replaceAll("['`´]+", "").replaceAll("[\\p{Punct}\\p{Space}]+", " "); name = normalizePunctuation(name);
// normalize case and trim // normalize case and trim
return name.trim().toLowerCase(); return name.toLowerCase();
} }
} }

View File

@ -0,0 +1,31 @@
package net.sourceforge.filebot.similarity;
public class Normalization {
public static String normalizePunctuation(String name) {
// remove/normalize special characters
name = name.replaceAll("['`´]+", "");
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
return name.trim();
}
public static String normalizeBrackets(String name) {
// remove group names and checksums, any [...] or (...)
name = name.replaceAll("\\([^\\(]*\\)", " ");
name = name.replaceAll("\\[[^\\[]*\\]", " ");
name = name.replaceAll("\\{[^\\{]*\\}", " ");
return name;
}
public static String removeEmbeddedChecksum(String string) {
// match embedded checksum and surrounding brackets
return string.replaceAll("[\\(\\[]\\p{XDigit}{8}[\\]\\)]", "");
}
}

View File

@ -2,6 +2,9 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static net.sourceforge.filebot.similarity.Normalization.*;
public class SubstringMetric implements SimilarityMetric { public class SubstringMetric implements SimilarityMetric {
@Override @Override
@ -23,7 +26,7 @@ public class SubstringMetric implements SimilarityMetric {
String name = object.toString(); String name = object.toString();
// normalize separators // normalize separators
name = name.replaceAll("['`´]+", "").replaceAll("[\\p{Punct}\\p{Space}]+", " "); name = normalizePunctuation(name);
// normalize case and trim // normalize case and trim
return name.trim().toLowerCase(); return name.trim().toLowerCase();

View File

@ -3,16 +3,17 @@ package net.sourceforge.filebot.web;
import static java.util.Collections.*; import static java.util.Collections.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import java.util.AbstractList; import java.util.AbstractList;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
@ -123,10 +124,7 @@ class LocalSearch<T> {
protected String normalize(String value) { protected String normalize(String value) {
// normalize separator, normalize case and trim // normalize separator, normalize case and trim
value = value.replaceAll("['`´]+", ""); return normalizePunctuation(value).toLowerCase();
value = value.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
return value.trim().toLowerCase();
} }
} }