Make patterns public
This commit is contained in:
parent
80f13040b3
commit
1564efc27c
|
@ -8,13 +8,13 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class Normalization {
|
public class Normalization {
|
||||||
|
|
||||||
private static final Pattern apostrophe = compile("['`´‘’ʻ]+");
|
public static final Pattern APOSTROPHE = compile("['`´‘’ʻ]+");
|
||||||
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+", UNICODE_CHARACTER_CLASS);
|
public static final Pattern PUNCTUATION_OR_SPACE = compile("[\\p{Punct}\\p{Space}]+", UNICODE_CHARACTER_CLASS);
|
||||||
private static final Pattern spaceLikePunctuation = compile("[:?._]");
|
public static final Pattern WORD_SEPARATOR_PUNCTUATION = compile("[:?._]");
|
||||||
|
|
||||||
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
|
public static final Pattern TRAILING_PARENTHESIS = compile("(?<!^)[(]([^)]*)[)]$");
|
||||||
private static final Pattern trailingPunctuation = compile("[!?.]+$");
|
public static final Pattern TRAILING_PUNCTUATION = compile("[!?.]+$");
|
||||||
private static final Pattern checksum = compile("[\\(\\[](\\p{XDigit}{8})[\\]\\)]");
|
public static final Pattern EMBEDDED_CHECKSUM = compile("[\\(\\[](\\p{XDigit}{8})[\\]\\)]");
|
||||||
|
|
||||||
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
|
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
|
||||||
|
|
||||||
|
@ -31,13 +31,13 @@ public class Normalization {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String trimTrailingPunctuation(String name) {
|
public static String trimTrailingPunctuation(String name) {
|
||||||
return trailingPunctuation.matcher(name).replaceAll("").trim();
|
return TRAILING_PUNCTUATION.matcher(name).replaceAll("").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String normalizePunctuation(String name) {
|
public static String normalizePunctuation(String name) {
|
||||||
// remove/normalize special characters
|
// remove/normalize special characters
|
||||||
name = apostrophe.matcher(name).replaceAll("");
|
name = APOSTROPHE.matcher(name).replaceAll("");
|
||||||
name = punctuation.matcher(name).replaceAll(" ");
|
name = PUNCTUATION_OR_SPACE.matcher(name).replaceAll(" ");
|
||||||
return name.trim();
|
return name.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ public class Normalization {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String normalizeSpace(String name, String replacement) {
|
public static String normalizeSpace(String name, String replacement) {
|
||||||
return replaceSpace(spaceLikePunctuation.matcher(name).replaceAll(" ").trim(), replacement);
|
return replaceSpace(WORD_SEPARATOR_PUNCTUATION.matcher(name).replaceAll(" ").trim(), replacement);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String replaceSpace(String name, String replacement) {
|
public static String replaceSpace(String name, String replacement) {
|
||||||
|
@ -58,7 +58,7 @@ public class Normalization {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getEmbeddedChecksum(String name) {
|
public static String getEmbeddedChecksum(String name) {
|
||||||
Matcher m = checksum.matcher(name);
|
Matcher m = EMBEDDED_CHECKSUM.matcher(name);
|
||||||
if (m.find()) {
|
if (m.find()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
@ -67,12 +67,12 @@ public class Normalization {
|
||||||
|
|
||||||
public static String removeEmbeddedChecksum(String name) {
|
public static String removeEmbeddedChecksum(String name) {
|
||||||
// match embedded checksum and surrounding brackets
|
// match embedded checksum and surrounding brackets
|
||||||
return checksum.matcher(name).replaceAll("");
|
return EMBEDDED_CHECKSUM.matcher(name).replaceAll("");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String removeTrailingBrackets(String name) {
|
public static String removeTrailingBrackets(String name) {
|
||||||
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
|
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
|
||||||
return trailingParentheses.matcher(name).replaceAll("").trim();
|
return TRAILING_PARENTHESIS.matcher(name).replaceAll("").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String truncateText(String title, int limit) {
|
public static String truncateText(String title, int limit) {
|
||||||
|
|
Loading…
Reference in New Issue