From 70a9fc7d0c8f7587a51d435922de69fa862fdc99 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Fri, 7 Oct 2016 21:19:49 +0800 Subject: [PATCH] Refactor SeasonEpisodeMatcher --- .../similarity/SeasonEpisodeMatcher.java | 218 ++++++++---------- 1 file changed, 99 insertions(+), 119 deletions(-) diff --git a/source/net/filebot/similarity/SeasonEpisodeMatcher.java b/source/net/filebot/similarity/SeasonEpisodeMatcher.java index 61a2c9fa..7b922467 100644 --- a/source/net/filebot/similarity/SeasonEpisodeMatcher.java +++ b/source/net/filebot/similarity/SeasonEpisodeMatcher.java @@ -1,7 +1,9 @@ package net.filebot.similarity; +import static java.util.Arrays.*; import static java.util.Collections.*; import static java.util.regex.Pattern.*; +import static java.util.stream.Collectors.*; import static net.filebot.util.FileUtilities.*; import static net.filebot.util.RegularExpressions.*; import static net.filebot.util.StringUtilities.*; @@ -9,13 +11,16 @@ import static net.filebot.util.StringUtilities.*; import java.io.File; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; +import java.util.IntSummaryStatistics; import java.util.LinkedHashSet; import java.util.List; +import java.util.Objects; import java.util.Set; +import java.util.function.Function; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.IntStream; public class SeasonEpisodeMatcher { @@ -31,133 +36,54 @@ public class SeasonEpisodeMatcher { SeasonEpisodePattern Season_00_Episode_00, S00E00SEQ, S00E00, SxE1_SxE2, SxE, Dot101, EP0, Num101_TOKEN, E1of2, Num101_SUBSTRING; // match patterns like Season 01 Episode 02, ... - Season_00_Episode_00 = new SeasonEpisodePattern(null, "(? { + return range(m.group(1), m.group(2)); + }); // match patterns like S01E01-E05 - S00E00SEQ = new SeasonEpisodePattern(null, "(? process(MatchResult match) { - List seq = new ArrayList(); - int s = Integer.parseInt(match.group(1)); - int e1 = Integer.parseInt(match.group(2)); - int e2 = Integer.parseInt(match.group(3)); - for (int i = e1; i <= e2; i++) { - seq.add(new SxE(s, i)); - } - return seq; - } - }; + S00E00SEQ = new SeasonEpisodePattern(null, "(? { + return range(m.group(1), m.group(2), m.group(3)); + }); // match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ... - S00E00 = new SeasonEpisodePattern(null, "(? process(MatchResult match) { - List matches = new ArrayList(2); - int seasonNumber = Integer.parseInt(match.group(1)); - for (int episodeNumber : matchIntegers(match.group(2))) { - matches.add(new SxE(seasonNumber, episodeNumber)); - } - return matches; - } - }; + S00E00 = new SeasonEpisodePattern(null, "(? { + return multi(m.group(1), m.group(2)); + }); // match patterns 1x01-1x02, ... - SxE1_SxE2 = new SeasonEpisodePattern(sanity, "(? process(MatchResult match) { - List matches = new ArrayList(2); - String[] numbers = NON_DIGIT.split(match.group(0)); - for (int i = 0; i < numbers.length; i += 2) { - matches.add(new SxE(numbers[i], numbers[i + 1])); // SxE-SxE-SxE - } - return matches; - } - }; + SxE1_SxE2 = new SeasonEpisodePattern(sanity, "(? { + return pairs(m.group()); + }); // match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ... 1x01-02-03-04, 1x01x02x03x04 ... - SxE = new SeasonEpisodePattern(sanity, "(? process(MatchResult match) { - List matches = new ArrayList(2); - int seasonNumber = Integer.parseInt(match.group(1)); - for (int episodeNumber : matchIntegers(match.group(2))) { - matches.add(new SxE(seasonNumber, episodeNumber)); - } - return matches; - } - }; + SxE = new SeasonEpisodePattern(sanity, "(? { + return multi(m.group(1), m.group(2)); + }); // match patterns 1.02, ..., 10.02, ... - Dot101 = new SeasonEpisodePattern(sanity, "(? process(MatchResult match) { - List matches = new ArrayList(2); - int seasonNumber = Integer.parseInt(match.group(1)); - for (int episodeNumber : matchIntegers(match.group(2))) { - matches.add(new SxE(seasonNumber, episodeNumber)); - } - return matches; - } - }; + Dot101 = new SeasonEpisodePattern(sanity, "(? { + return multi(m.group(1), m.group(2)); + }); // match patterns like ep1, ep.1, ... - EP0 = new SeasonEpisodePattern(sanity, "(? process(MatchResult match) { - // regex doesn't match season - return singleton(new SxE(match.group(1), match.group(2))); - } - }; + EP0 = new SeasonEpisodePattern(sanity, "(? { + return single(m.group(1), m.group(2)); + }); // match patterns like 01, 102, 1003, 10102 (enclosed in separators) - Num101_TOKEN = new SeasonEpisodePattern(sanity, "(? { + return numbers(m.group(1), IntStream.rangeClosed(2, m.groupCount()).mapToObj(m::group).filter(Objects::nonNull).toArray(String[]::new)); + }); - @Override - protected Collection process(MatchResult match) { - Set sxe = new LinkedHashSet(2); - - // interpret match as season and episode, but ignore 001 => 0x01 Season 0 matches - if (match.group(1).length() > 0 && Integer.parseInt(match.group(1)) > 0) { - for (int i = 2; i <= match.groupCount(); i++) { - if (match.group(i) != null) { - sxe.add(new SxE(match.group(1), match.group(i))); - } - } - } - - // interpret match both ways, as SxE match as well as episode number only match if it's not an double episode - if (sxe.size() < 2) { - sxe.add(new SxE(null, match.group(1) + match.group(2))); - } - - // return both matches, unless they are one and the same - return sxe; - } - }; - - E1of2 = new SeasonEpisodePattern(sanity, "(? process(MatchResult match) { - // regex doesn't match season - return singleton(new SxE(null, match.group(1))); - } - }; + // match patterns like "1 of 2" as Episode 1 + E1of2 = new SeasonEpisodePattern(sanity, "(? { + return single(null, m.group(1)); + }); // (last-resort) match patterns like 101, 102 (and greedily just grab the first) - Num101_SUBSTRING = new SeasonEpisodePattern(STRICT_SANITY, "(? process(MatchResult match) { - return singleton(new SxE(match.group(1), match.group(2))); - } - }; + Num101_SUBSTRING = new SeasonEpisodePattern(STRICT_SANITY, "(? { + return single(m.group(1), m.group(2)); + }); // only use S00E00 and SxE pattern in strict mode if (strict) { @@ -170,6 +96,57 @@ public class SeasonEpisodeMatcher { seasonPattern = compile("Season[-._ ]?(\\d{1,2})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); } + protected List single(String season, String episode) { + return singletonList(new SxE(season, episode)); + } + + protected List multi(String season, String... episodes) { + int s = Integer.parseInt(season); + return stream(episodes).flatMap(e -> matchIntegers(e).stream()).map(e -> new SxE(s, e)).collect(toList()); + } + + protected List range(String season, String... episodes) { + IntSummaryStatistics stats = stream(episodes).flatMap(s -> matchIntegers(s).stream()).mapToInt(i -> i).summaryStatistics(); + + int s = Integer.parseInt(season); + return IntStream.rangeClosed(stats.getMin(), stats.getMax()).mapToObj(e -> new SxE(s, e)).collect(toList()); + } + + protected List pairs(String text) { + List matches = new ArrayList(2); + + // SxE-SxE-SxE + String[] numbers = NON_DIGIT.split(text); + for (int i = 0; i < numbers.length; i += 2) { + matches.add(new SxE(numbers[i], numbers[i + 1])); + } + + return matches; + } + + protected List numbers(String head, String... tail) { + List matches = new ArrayList(2); + + // interpret match as season and episode, but ignore 001 => 0x01 Season 0 matches + for (String t : tail) { + SxE sxe = new SxE(head, t); + if (sxe.season > 0) { + matches.add(sxe); + } + } + + // interpret match both ways, as SxE match as well as episode number only match if it's not an double episode + if (tail.length == 1) { + SxE absolute = new SxE(null, head + tail[0]); + if (!matches.contains(absolute)) { + matches.add(absolute); + } + } + + // return both matches, unless they are one and the same + return matches; + } + /** * Try to get season and episode numbers for the given string. * @@ -319,11 +296,18 @@ public class SeasonEpisodeMatcher { public static class SeasonEpisodePattern implements SeasonEpisodeParser { - protected final Pattern pattern; - protected final SeasonEpisodeFilter sanity; + protected Pattern pattern; + protected Function> process; + + protected SeasonEpisodeFilter sanity; public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) { + this(sanity, pattern, m -> singletonList(new SxE(m.group(1), m.group(2)))); + } + + public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern, Function> process) { this.pattern = Pattern.compile(pattern); + this.process = process; this.sanity = sanity; } @@ -331,10 +315,6 @@ public class SeasonEpisodeMatcher { return pattern.matcher(name); } - protected Collection process(MatchResult match) { - return singleton(new SxE(match.group(1), match.group(2))); - } - @Override public List match(CharSequence name) { // name will probably contain no more than two matches @@ -343,7 +323,7 @@ public class SeasonEpisodeMatcher { Matcher matcher = matcher(name); while (matcher.find()) { - for (SxE value : process(matcher)) { + for (SxE value : process.apply(matcher)) { if (sanity == null || sanity.filter(value)) { matches.add(value); } @@ -358,7 +338,7 @@ public class SeasonEpisodeMatcher { Matcher matcher = matcher(name).region(fromIndex, name.length()); while (matcher.find()) { - for (SxE value : process(matcher)) { + for (SxE value : process.apply(matcher)) { if (sanity == null || sanity.filter(value)) { return matcher.start(); }