Refactor Normalization

This commit is contained in:
Reinhard Pointner 2016-11-09 23:02:25 +08:00
parent 715f12601a
commit 5bf208fadb
3 changed files with 22 additions and 11 deletions

View File

@ -34,7 +34,6 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import java.util.stream.Stream; import java.util.stream.Stream;
@ -308,7 +307,7 @@ public class MediaBindingBean {
String codec = getMediaInfo(StreamKind.Video, 0, "Encoded_Library_Name", "Encoded_Library/Name", "CodecID/Hint", "Format"); String codec = getMediaInfo(StreamKind.Video, 0, "Encoded_Library_Name", "Encoded_Library/Name", "CodecID/Hint", "Format");
// get first token (e.g. DivX 5 => DivX) // get first token (e.g. DivX 5 => DivX)
return SPACE.splitAsStream(codec).findFirst().get(); return tokenize(codec).findFirst().get();
} }
@Define("ac") @Define("ac")
@ -317,7 +316,7 @@ public class MediaBindingBean {
String codec = getMediaInfo(StreamKind.Audio, 0, "CodecID/Hint", "Format"); String codec = getMediaInfo(StreamKind.Audio, 0, "CodecID/Hint", "Format");
// remove punctuation (e.g. AC-3 => AC3) // remove punctuation (e.g. AC-3 => AC3)
return PUNCTUATION_OR_SPACE.matcher(codec).replaceAll(""); return normalizePunctuation(codec, "", "");
} }
@Define("cf") @Define("cf")
@ -326,7 +325,7 @@ public class MediaBindingBean {
String extensions = getMediaInfo(StreamKind.General, 0, "Codec/Extensions", "Format"); String extensions = getMediaInfo(StreamKind.General, 0, "Codec/Extensions", "Format");
// get first extension // get first extension
return SPACE.splitAsStream(extensions).findFirst().get().toLowerCase(); return tokenize(extensions).map(String::toLowerCase).findFirst().get();
} }
@Define("vf") @Define("vf")
@ -334,19 +333,23 @@ public class MediaBindingBean {
int width = Integer.parseInt(getMediaInfo(StreamKind.Video, 0, "Width")); int width = Integer.parseInt(getMediaInfo(StreamKind.Video, 0, "Width"));
int height = Integer.parseInt(getMediaInfo(StreamKind.Video, 0, "Height")); int height = Integer.parseInt(getMediaInfo(StreamKind.Video, 0, "Height"));
int ns = 0;
int[] ws = new int[] { 15360, 7680, 3840, 1920, 1280, 1024, 854, 852, 720, 688, 512, 320 }; int[] ws = new int[] { 15360, 7680, 3840, 1920, 1280, 1024, 854, 852, 720, 688, 512, 320 };
int[] hs = new int[] { 8640, 4320, 2160, 1080, 720, 576, 576, 480, 480, 360, 240, 240 }; int[] hs = new int[] { 8640, 4320, 2160, 1080, 720, 576, 576, 480, 480, 360, 240, 240 };
int ns = 0;
for (int i = 0; i < ws.length - 1; i++) { for (int i = 0; i < ws.length - 1; i++) {
if ((width >= ws[i] || height >= hs[i]) || (width > ws[i + 1] && height > hs[i + 1])) { if ((width >= ws[i] || height >= hs[i]) || (width > ws[i + 1] && height > hs[i + 1])) {
ns = hs[i]; ns = hs[i];
break; break;
} }
} }
if (ns > 0) { if (ns > 0) {
// e.g. 720p, nobody actually wants files to be tagged as interlaced, e.g. 720i // e.g. 720p, nobody actually wants files to be tagged as interlaced, e.g. 720i
return String.format("%dp", ns); return String.format("%dp", ns);
} }
return null; // video too small return null; // video too small
} }
@ -364,7 +367,7 @@ public class MediaBindingBean {
String channels = getMediaInfo(StreamKind.Audio, 0, "Channel(s)_Original", "Channel(s)"); String channels = getMediaInfo(StreamKind.Audio, 0, "Channel(s)_Original", "Channel(s)");
// get first number, e.g. 6ch // get first number, e.g. 6ch
return SPACE.splitAsStream(channels).filter(DIGIT.asPredicate()).findFirst().get() + "ch"; return String.format("%dch", matchInteger(channels));
} }
@Define("channels") @Define("channels")
@ -372,9 +375,9 @@ public class MediaBindingBean {
String channels = getMediaInfo(StreamKind.Audio, 0, "ChannelPositions/String2", "Channel(s)_Original", "Channel(s)"); String channels = getMediaInfo(StreamKind.Audio, 0, "ChannelPositions/String2", "Channel(s)_Original", "Channel(s)");
// e.g. ChannelPositions/String2: 3/2/2.1 / 3/2/0.1 (one audio stream may contain multiple multi-channel streams) // e.g. ChannelPositions/String2: 3/2/2.1 / 3/2/0.1 (one audio stream may contain multiple multi-channel streams)
double d = SPACE.splitAsStream(channels).mapToDouble(s -> { double d = tokenize(channels).mapToDouble(s -> {
try { try {
return SLASH.splitAsStream(s).mapToDouble(Double::parseDouble).reduce(0, (a, b) -> a + b); return tokenize(s, SLASH).mapToDouble(Double::parseDouble).reduce(0, (a, b) -> a + b);
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
return 0; return 0;
} }
@ -492,7 +495,7 @@ public class MediaBindingBean {
// heavy normalization for whatever text was captured with the tags pattern // heavy normalization for whatever text was captured with the tags pattern
return matches.stream().map(s -> { return matches.stream().map(s -> {
return lowerTrail(upperInitial(normalizePunctuation(normalizeSpace(s, " ")))); return lowerTrail(upperInitial(normalizePunctuation(s)));
}).sorted().distinct().collect(toList()); }).sorted().distinct().collect(toList());
} }
@ -1178,7 +1181,7 @@ public class MediaBindingBean {
// word list for exclude pattern // word list for exclude pattern
String pattern = keys.stream().filter(Objects::nonNull).map(Objects::toString).map(s -> { String pattern = keys.stream().filter(Objects::nonNull).map(Objects::toString).map(s -> {
return PUNCTUATION_OR_SPACE.matcher(s).replaceAll(Matcher.quoteReplacement("\\P{Alnum}+")); return normalizePunctuation(s, " ", "\\P{Alnum}+");
}).filter(s -> s.length() > 0).collect(joining("|", "\\b(", ")\\b")); }).filter(s -> s.length() > 0).collect(joining("|", "\\b(", ")\\b"));
return Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS); return Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);

View File

@ -88,7 +88,7 @@ public class Normalization {
} }
private static String normalize(CharSequence name, Pattern pattern, String replacement) { private static String normalize(CharSequence name, Pattern pattern, String replacement) {
return pattern.matcher(name).replaceAll(replacement).trim(); return pattern.matcher(name).replaceAll(Matcher.quoteReplacement(replacement)).trim();
} }
private static String normalize(String name, Pattern[] pattern, String replacement) { private static String normalize(String name, Pattern[] pattern, String replacement) {

View File

@ -55,6 +55,14 @@ public final class StringUtilities {
return null; return null;
} }
public static Stream<String> tokenize(CharSequence s) {
return tokenize(s, SPACE);
}
public static Stream<String> tokenize(CharSequence s, Pattern pattern) {
return pattern.splitAsStream(s).filter(w -> w.length() > 0);
}
public static Stream<String> streamMatches(CharSequence s, Pattern pattern) { public static Stream<String> streamMatches(CharSequence s, Pattern pattern) {
return streamMatches(s, pattern, MatchResult::group); return streamMatches(s, pattern, MatchResult::group);
} }