From 5bf208fadbd390fa9dbe502d8e85c3c6fc7dd9b3 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Wed, 9 Nov 2016 23:02:25 +0800 Subject: [PATCH] Refactor Normalization --- .../net/filebot/format/MediaBindingBean.java | 23 +++++++++++-------- .../net/filebot/similarity/Normalization.java | 2 +- source/net/filebot/util/StringUtilities.java | 8 +++++++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/source/net/filebot/format/MediaBindingBean.java b/source/net/filebot/format/MediaBindingBean.java index dad9101f..bbbd9441 100644 --- a/source/net/filebot/format/MediaBindingBean.java +++ b/source/net/filebot/format/MediaBindingBean.java @@ -34,7 +34,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; -import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -308,7 +307,7 @@ public class MediaBindingBean { String codec = getMediaInfo(StreamKind.Video, 0, "Encoded_Library_Name", "Encoded_Library/Name", "CodecID/Hint", "Format"); // get first token (e.g. DivX 5 => DivX) - return SPACE.splitAsStream(codec).findFirst().get(); + return tokenize(codec).findFirst().get(); } @Define("ac") @@ -317,7 +316,7 @@ public class MediaBindingBean { String codec = getMediaInfo(StreamKind.Audio, 0, "CodecID/Hint", "Format"); // remove punctuation (e.g. AC-3 => AC3) - return PUNCTUATION_OR_SPACE.matcher(codec).replaceAll(""); + return normalizePunctuation(codec, "", ""); } @Define("cf") @@ -326,7 +325,7 @@ public class MediaBindingBean { String extensions = getMediaInfo(StreamKind.General, 0, "Codec/Extensions", "Format"); // get first extension - return SPACE.splitAsStream(extensions).findFirst().get().toLowerCase(); + return tokenize(extensions).map(String::toLowerCase).findFirst().get(); } @Define("vf") @@ -334,19 +333,23 @@ public class MediaBindingBean { int width = Integer.parseInt(getMediaInfo(StreamKind.Video, 0, "Width")); int height = Integer.parseInt(getMediaInfo(StreamKind.Video, 0, "Height")); - int ns = 0; int[] ws = new int[] { 15360, 7680, 3840, 1920, 1280, 1024, 854, 852, 720, 688, 512, 320 }; int[] hs = new int[] { 8640, 4320, 2160, 1080, 720, 576, 576, 480, 480, 360, 240, 240 }; + + int ns = 0; + for (int i = 0; i < ws.length - 1; i++) { if ((width >= ws[i] || height >= hs[i]) || (width > ws[i + 1] && height > hs[i + 1])) { ns = hs[i]; break; } } + if (ns > 0) { // e.g. 720p, nobody actually wants files to be tagged as interlaced, e.g. 720i return String.format("%dp", ns); } + return null; // video too small } @@ -364,7 +367,7 @@ public class MediaBindingBean { String channels = getMediaInfo(StreamKind.Audio, 0, "Channel(s)_Original", "Channel(s)"); // get first number, e.g. 6ch - return SPACE.splitAsStream(channels).filter(DIGIT.asPredicate()).findFirst().get() + "ch"; + return String.format("%dch", matchInteger(channels)); } @Define("channels") @@ -372,9 +375,9 @@ public class MediaBindingBean { String channels = getMediaInfo(StreamKind.Audio, 0, "ChannelPositions/String2", "Channel(s)_Original", "Channel(s)"); // e.g. ChannelPositions/String2: 3/2/2.1 / 3/2/0.1 (one audio stream may contain multiple multi-channel streams) - double d = SPACE.splitAsStream(channels).mapToDouble(s -> { + double d = tokenize(channels).mapToDouble(s -> { try { - return SLASH.splitAsStream(s).mapToDouble(Double::parseDouble).reduce(0, (a, b) -> a + b); + return tokenize(s, SLASH).mapToDouble(Double::parseDouble).reduce(0, (a, b) -> a + b); } catch (NumberFormatException e) { return 0; } @@ -492,7 +495,7 @@ public class MediaBindingBean { // heavy normalization for whatever text was captured with the tags pattern return matches.stream().map(s -> { - return lowerTrail(upperInitial(normalizePunctuation(normalizeSpace(s, " ")))); + return lowerTrail(upperInitial(normalizePunctuation(s))); }).sorted().distinct().collect(toList()); } @@ -1178,7 +1181,7 @@ public class MediaBindingBean { // word list for exclude pattern String pattern = keys.stream().filter(Objects::nonNull).map(Objects::toString).map(s -> { - return PUNCTUATION_OR_SPACE.matcher(s).replaceAll(Matcher.quoteReplacement("\\P{Alnum}+")); + return normalizePunctuation(s, " ", "\\P{Alnum}+"); }).filter(s -> s.length() > 0).collect(joining("|", "\\b(", ")\\b")); return Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); diff --git a/source/net/filebot/similarity/Normalization.java b/source/net/filebot/similarity/Normalization.java index a5e5c191..0b9469eb 100644 --- a/source/net/filebot/similarity/Normalization.java +++ b/source/net/filebot/similarity/Normalization.java @@ -88,7 +88,7 @@ public class Normalization { } private static String normalize(CharSequence name, Pattern pattern, String replacement) { - return pattern.matcher(name).replaceAll(replacement).trim(); + return pattern.matcher(name).replaceAll(Matcher.quoteReplacement(replacement)).trim(); } private static String normalize(String name, Pattern[] pattern, String replacement) { diff --git a/source/net/filebot/util/StringUtilities.java b/source/net/filebot/util/StringUtilities.java index ee65d9ba..a596b3c2 100644 --- a/source/net/filebot/util/StringUtilities.java +++ b/source/net/filebot/util/StringUtilities.java @@ -55,6 +55,14 @@ public final class StringUtilities { return null; } + public static Stream tokenize(CharSequence s) { + return tokenize(s, SPACE); + } + + public static Stream tokenize(CharSequence s, Pattern pattern) { + return pattern.splitAsStream(s).filter(w -> w.length() > 0); + } + public static Stream streamMatches(CharSequence s, Pattern pattern) { return streamMatches(s, pattern, MatchResult::group); }