Reinhard Pointner 2016-04-01 17:48:01 +00:00
parent cba0483703
commit 0ac8d2d6cc
11 changed files with 52 additions and 42 deletions

View File

@ -1,5 +1,7 @@
package net.filebot.archive;
import static net.filebot.util.StringUtilities.*;
import static java.nio.charset.StandardCharsets.*;
import static java.util.Arrays.*;
import static net.filebot.Logging.*;
@ -55,7 +57,7 @@ public class SevenZipExecutable implements ArchiveExtractor {
if (returnCode == 0) {
return output;
} else {
throw new IOException(String.format("%s failed with exit code %d: %s", get7zCommand(), returnCode, output.replaceAll("\\s+", " ").trim()));
throw new IOException(String.format("%s failed with exit code %d: %s", get7zCommand(), returnCode, SPACE.matcher(output).replaceAll(" ").trim()));
}
} catch (InterruptedException e) {
throw new IOException(String.format("%s timed out", get7zCommand()), e);

View File

@ -117,7 +117,10 @@ public class ExpressionFormatMethods {
* e.g. "Doctor Who" -> "Doctor_Who"
*/
public static String space(String self, String replacement) {
return self.replaceAll("[:?._]", " ").trim().replaceAll("\\s+", replacement);
self = self.replaceAll("[:?._]", " ").trim();
// replace space sequences with a single blank
return Normalization.replaceSpace(self, replacement);
}
/**
@ -126,7 +129,7 @@ public class ExpressionFormatMethods {
* e.g. "Sissi: The Young Empress" -> "Sissi - The Young Empress"
*/
public static String colon(String self, String replacement) {
return self.replaceAll("\\s*[:]\\s*", replacement);
return compile("\\s*[:]\\s*", UNICODE_CHARACTER_CLASS).matcher(self).replaceAll(replacement);
}
/**
@ -151,7 +154,7 @@ public class ExpressionFormatMethods {
}
public static String sortName(String self, String replacement) {
return compile("^(The|A|An)\\s(.+)", CASE_INSENSITIVE).matcher(self).replaceFirst(replacement).trim();
return compile("^(The|A|An)\\s(.+)", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self).replaceFirst(replacement).trim();
}
public static String sortInitial(String self) {
@ -268,7 +271,7 @@ public class ExpressionFormatMethods {
}
public static String replaceTrailingBrackets(String self, String replacement) {
return self.replaceAll("\\s*[(]([^)]*)[)]$", replacement).trim();
return compile("\\s*[(]([^)]*)[)]$", UNICODE_CHARACTER_CLASS).matcher(self).replaceAll(replacement).trim();
}
/**
@ -285,7 +288,7 @@ public class ExpressionFormatMethods {
String[] patterns = new String[] { "\\s*[(](\\w+)[)]$", "\\W+Part (\\w+)\\W*$" };
for (String pattern : patterns) {
Matcher matcher = compile(pattern, CASE_INSENSITIVE).matcher(self);
Matcher matcher = compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self);
if (matcher.find()) {
return matcher.replaceAll(replacement).trim();
}

View File

@ -1350,10 +1350,9 @@ public class MediaDetection {
private static class HighPerformanceMatcher extends CommonSequenceMatcher {
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
private static final Pattern space = Pattern.compile("\\s+");
public static CollationKey[] prepare(String sequence) {
String[] words = space.split(sequence);
String[] words = SPACE.split(sequence);
CollationKey[] keys = new CollationKey[words.length];
for (int i = 0; i < words.length; i++) {
keys[i] = collator.getCollationKey(words[i]);

View File

@ -2,6 +2,7 @@ package net.filebot.similarity;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static net.filebot.util.StringUtilities.*;
import java.text.CollationKey;
import java.text.Collator;
@ -77,7 +78,7 @@ public class CommonSequenceMatcher {
}
public CollationKey[] split(String sequence) {
return getCollationKeys(sequence.split("\\s+"));
return getCollationKeys(SPACE.split(sequence));
}
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new HashMap<String, CollationKey>(256));

View File

@ -1,22 +1,22 @@
package net.filebot.similarity;
import static java.util.regex.Pattern.*;
import static net.filebot.util.StringUtilities.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Normalization {
private static final Pattern apostrophe = compile("['`´ʻ]+");
private static final Pattern punctuation = compile("[\\p{Punct}\\s]+");
private static final Pattern space = compile("\\s+");
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+", Pattern.UNICODE_CHARACTER_CLASS);
private static final Pattern spaceLikePunctuation = compile("[:?._]");
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
private static final Pattern trailingPunctuation = compile("[!?.]+$");
private static final Pattern checksum = compile("[\\(\\[](\\p{XDigit}{8})[\\]\\)]");
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
private static final char[] doubleQuotes = new char[] { '\'', '\u0060', '\u00b4', '\u2018', '\u2019', '\u02bb' };
private static final char[] singleQuotes = new char[] { '\"', '\u201c', '\u201d' };
@ -55,7 +55,15 @@ public class Normalization {
}
public static String replaceSpace(String name, String replacement) {
return space.matcher(name).replaceAll(replacement);
return SPACE.matcher(name).replaceAll(replacement);
}
public static String getEmbeddedChecksum(String name) {
Matcher m = checksum.matcher(name);
if (m.find()) {
return m.group(1);
}
return null;
}
public static String removeEmbeddedChecksum(String name) {
@ -73,7 +81,7 @@ public class Normalization {
return title;
}
String[] words = space.split(title);
String[] words = SPACE.split(title);
StringBuilder s = new StringBuilder();
for (int i = 0; i < words.length && s.length() + words[i].length() < limit; i++) {

View File

@ -209,7 +209,7 @@ public class SeriesNameMatcher {
}
public String matchBySeparator(String name) {
Pattern separator = Pattern.compile("[\\s]+[-]+[\\s]+");
Pattern separator = Pattern.compile("[\\s]+[-]+[\\s]+", Pattern.UNICODE_CHARACTER_CLASS);
Matcher matcher = separator.matcher(name);
if (matcher.find() && matcher.start() > 0) {

View File

@ -9,6 +9,7 @@ import static net.filebot.Logging.*;
import static net.filebot.Settings.*;
import static net.filebot.UserFiles.*;
import static net.filebot.media.XattrMetaInfo.*;
import static net.filebot.util.StringUtilities.*;
import static net.filebot.util.ui.SwingUI.*;
import java.awt.Color;
@ -225,7 +226,7 @@ class HistoryDialog extends JDialog {
List<HistoryFilter> filterList = new ArrayList<HistoryFilter>();
// filter by all words
for (String word : filterEditor.getText().split("\\s+")) {
for (String word : SPACE.split(filterEditor.getText())) {
filterList.add(new HistoryFilter(word));
}

View File

@ -3,6 +3,7 @@ package net.filebot.ui.rename;
import static java.awt.Font.*;
import static javax.swing.BorderFactory.*;
import static net.filebot.Logging.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.ui.SwingUI.*;
import java.awt.Component;
@ -205,9 +206,10 @@ public class PresetEditor extends JDialog {
private RSyntaxTextArea createEditor() {
final RSyntaxTextArea editor = new RSyntaxTextArea(new RSyntaxDocument(SyntaxConstants.SYNTAX_STYLE_GROOVY) {
@Override
public void insertString(int offs, String str, AttributeSet a) throws BadLocationException {
super.insertString(offs, str.replaceAll("\\s", " "), a); // FORCE SINGLE LINE
super.insertString(offs, replaceSpace(str, " "), a); // FORCE SINGLE LINE
}
}, null, 1, 80);

View File

@ -3,6 +3,7 @@ package net.filebot.ui.subtitle;
import static java.awt.Font.*;
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.ui.SwingUI.*;
import java.awt.Color;
@ -130,15 +131,12 @@ public class SubtitleViewer extends JFrame {
@Override
public Component getTableCellRendererComponent(JTable table, Object value, boolean isSelected, boolean hasFocus, int row, int column) {
return super.getTableCellRendererComponent(table, value.toString().replaceAll("\\s+", " "), isSelected, hasFocus, row, column);
return super.getTableCellRendererComponent(table, replaceSpace(value.toString(), " "), isSelected, hasFocus, row, column);
}
});
// focus around selected time stamp
installAction(table, KeyStroke.getKeyStroke(KeyEvent.VK_ENTER, 0), new AbstractAction("focus") {
@Override
public void actionPerformed(ActionEvent e) {
installAction(table, KeyStroke.getKeyStroke(KeyEvent.VK_ENTER, 0), newAction("focus", evt -> {
// disable row filter
setTableFilter(null);
@ -146,18 +144,13 @@ public class SubtitleViewer extends JFrame {
Rectangle focus = table.getCellRect(Math.max(table.getSelectedRow() - 7, 0), 0, true);
focus.height = table.getSize().height;
table.scrollRectToVisible(focus);
}
});
}));
table.addMouseListener(new MouseInputAdapter() {
@Override
public void mouseClicked(MouseEvent e) {
if (SwingUtilities.isLeftMouseButton(e) && e.getClickCount() == 2) {
table.addMouseListener(mouseClicked(evt -> {
if (SwingUtilities.isLeftMouseButton(evt) && evt.getClickCount() == 2) {
table.getActionMap().get("focus").actionPerformed(null);
}
}
});
}));
return table;
}

View File

@ -4,6 +4,7 @@ import static java.nio.charset.StandardCharsets.*;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static net.filebot.Logging.*;
import static net.filebot.util.StringUtilities.*;
import java.io.BufferedInputStream;
import java.io.File;
@ -571,7 +572,7 @@ public final class FileUtilities {
*/
public static String validateFileName(CharSequence filename) {
// strip invalid characters from file name
return ILLEGAL_CHARACTERS.matcher(filename).replaceAll("").replaceAll("\\s+", " ").trim();
return SPACE.matcher(ILLEGAL_CHARACTERS.matcher(filename).replaceAll("")).replaceAll(" ").trim();
}
public static boolean isInvalidFileName(CharSequence filename) {

View File

@ -14,10 +14,10 @@ import java.util.stream.Stream;
public final class StringUtilities {
public static final Pattern SPACE = Pattern.compile("\\s+");
public static final Pattern DIGIT = Pattern.compile("\\d+");
public static final Pattern NON_DIGIT = Pattern.compile("\\D+");
public static final Pattern PIPE = Pattern.compile("|", Pattern.LITERAL);
public static final Pattern SPACE = Pattern.compile("\\s+", Pattern.UNICODE_CHARACTER_CLASS); // French No-Break Space U+00A0
public static List<Integer> matchIntegers(CharSequence s) {
if (s == null || s.length() == 0) {