+ String.asciiQuotes() to normalize wierd quotation marks (e.g. "\u00b4\u2018\u2019\u02bb".asciiQuotes() == "''''")
This commit is contained in:
parent
5c1dac0533
commit
8a77762e34
|
@ -12,6 +12,7 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import net.filebot.similarity.Normalization;
|
||||
import net.filebot.util.FileUtilities;
|
||||
|
||||
import com.ibm.icu.text.Transliterator;
|
||||
|
@ -254,6 +255,10 @@ public class ExpressionFormatMethods {
|
|||
return Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove").transform(self).replaceAll("[^\\p{ASCII}]+", fallback).trim();
|
||||
}
|
||||
|
||||
public static String asciiQuotes(String self) {
|
||||
return Normalization.normalizeQuotationMarks(self);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace multiple replacement pairs
|
||||
*
|
||||
|
|
|
@ -17,6 +17,18 @@ public class Normalization {
|
|||
|
||||
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
|
||||
|
||||
private static final char[] doubleQuotes = new char[] { '\"', '\u0060', '\u00b4', '\u2018', '\u2019', '\u02bb' };
|
||||
private static final char[] singleQuotes = new char[] { '\'', '\u201c', '\u201d' };
|
||||
|
||||
public static String normalizeQuotationMarks(String name) {
|
||||
for (char[] cs : new char[][] { doubleQuotes, singleQuotes }) {
|
||||
for (char c : cs) {
|
||||
name = name.replace(c, cs[0]);
|
||||
}
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
public static String normalizePunctuation(String name) {
|
||||
// remove/normalize special characters
|
||||
name = apostrophe.matcher(name).replaceAll("");
|
||||
|
|
Loading…
Reference in New Issue