* force all \s+ to single space char

This commit is contained in:
Reinhard Pointner 2014-06-25 10:28:15 +00:00
parent 5b1fe0a22f
commit 94a380384f
2 changed files with 12 additions and 4 deletions

View File

@ -1,5 +1,6 @@
package net.filebot.format;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.ExceptionUtilities.*;
import static net.filebot.util.FileUtilities.*;
import groovy.lang.GroovyClassLoader;
@ -198,7 +199,7 @@ public class ExpressionFormat extends Format {
protected Object normalizeBindingValue(Object value) {
// if the binding value is a String, remove illegal characters
if (value instanceof CharSequence) {
return replacePathSeparators(value.toString()).trim();
return replaceSpace(replacePathSeparators((CharSequence) value), " ").trim();
}
// if the binding value is an Object, just leave it

View File

@ -9,6 +9,9 @@ public class Normalization {
private static final Pattern apostrophe = compile("['`´ʻ]+");
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
private static final Pattern space = compile("\\s+");
private static final Pattern spaceLikePunctuation = compile("[:?._]");
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
@ -31,12 +34,16 @@ public class Normalization {
}
public static String normalizeSpace(String name, String replacement) {
return name.replaceAll("[:?._]", " ").trim().replaceAll("\\s+", replacement);
return replaceSpace(spaceLikePunctuation.matcher(name).replaceAll(" ").trim(), replacement);
}
public static String removeEmbeddedChecksum(String string) {
public static String replaceSpace(String name, String replacement) {
return space.matcher(name).replaceAll(replacement);
}
public static String removeEmbeddedChecksum(String name) {
// match embedded checksum and surrounding brackets
return checksum.matcher(string).replaceAll("");
return checksum.matcher(name).replaceAll("");
}
public static String removeTrailingBrackets(String name) {