Make patterns public
This commit is contained in:
parent
80f13040b3
commit
1564efc27c
|
@ -8,13 +8,13 @@ import java.util.regex.Pattern;
|
|||
|
||||
public class Normalization {
|
||||
|
||||
private static final Pattern apostrophe = compile("['`´‘’ʻ]+");
|
||||
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+", UNICODE_CHARACTER_CLASS);
|
||||
private static final Pattern spaceLikePunctuation = compile("[:?._]");
|
||||
public static final Pattern APOSTROPHE = compile("['`´‘’ʻ]+");
|
||||
public static final Pattern PUNCTUATION_OR_SPACE = compile("[\\p{Punct}\\p{Space}]+", UNICODE_CHARACTER_CLASS);
|
||||
public static final Pattern WORD_SEPARATOR_PUNCTUATION = compile("[:?._]");
|
||||
|
||||
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
|
||||
private static final Pattern trailingPunctuation = compile("[!?.]+$");
|
||||
private static final Pattern checksum = compile("[\\(\\[](\\p{XDigit}{8})[\\]\\)]");
|
||||
public static final Pattern TRAILING_PARENTHESIS = compile("(?<!^)[(]([^)]*)[)]$");
|
||||
public static final Pattern TRAILING_PUNCTUATION = compile("[!?.]+$");
|
||||
public static final Pattern EMBEDDED_CHECKSUM = compile("[\\(\\[](\\p{XDigit}{8})[\\]\\)]");
|
||||
|
||||
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
|
||||
|
||||
|
@ -31,13 +31,13 @@ public class Normalization {
|
|||
}
|
||||
|
||||
public static String trimTrailingPunctuation(String name) {
|
||||
return trailingPunctuation.matcher(name).replaceAll("").trim();
|
||||
return TRAILING_PUNCTUATION.matcher(name).replaceAll("").trim();
|
||||
}
|
||||
|
||||
public static String normalizePunctuation(String name) {
|
||||
// remove/normalize special characters
|
||||
name = apostrophe.matcher(name).replaceAll("");
|
||||
name = punctuation.matcher(name).replaceAll(" ");
|
||||
name = APOSTROPHE.matcher(name).replaceAll("");
|
||||
name = PUNCTUATION_OR_SPACE.matcher(name).replaceAll(" ");
|
||||
return name.trim();
|
||||
}
|
||||
|
||||
|
@ -50,7 +50,7 @@ public class Normalization {
|
|||
}
|
||||
|
||||
public static String normalizeSpace(String name, String replacement) {
|
||||
return replaceSpace(spaceLikePunctuation.matcher(name).replaceAll(" ").trim(), replacement);
|
||||
return replaceSpace(WORD_SEPARATOR_PUNCTUATION.matcher(name).replaceAll(" ").trim(), replacement);
|
||||
}
|
||||
|
||||
public static String replaceSpace(String name, String replacement) {
|
||||
|
@ -58,7 +58,7 @@ public class Normalization {
|
|||
}
|
||||
|
||||
public static String getEmbeddedChecksum(String name) {
|
||||
Matcher m = checksum.matcher(name);
|
||||
Matcher m = EMBEDDED_CHECKSUM.matcher(name);
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
@ -67,12 +67,12 @@ public class Normalization {
|
|||
|
||||
public static String removeEmbeddedChecksum(String name) {
|
||||
// match embedded checksum and surrounding brackets
|
||||
return checksum.matcher(name).replaceAll("");
|
||||
return EMBEDDED_CHECKSUM.matcher(name).replaceAll("");
|
||||
}
|
||||
|
||||
public static String removeTrailingBrackets(String name) {
|
||||
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
|
||||
return trailingParentheses.matcher(name).replaceAll("").trim();
|
||||
return TRAILING_PARENTHESIS.matcher(name).replaceAll("").trim();
|
||||
}
|
||||
|
||||
public static String truncateText(String title, int limit) {
|
||||
|
|
Loading…
Reference in New Issue