Refactor ClutterBracketPattern

This commit is contained in:
Reinhard Pointner 2016-03-29 11:53:21 +00:00
parent 32c40157a4
commit 91ed090da1
2 changed files with 28 additions and 16 deletions

View File

@ -36,6 +36,7 @@ import java.util.function.Function;
import java.util.function.IntFunction; import java.util.function.IntFunction;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.tukaani.xz.XZInputStream; import org.tukaani.xz.XZInputStream;
@ -181,9 +182,10 @@ public class ReleaseInfo {
} }
return items.stream().map(it -> { return items.stream().map(it -> {
it = strict ? clean(it, stopwords[b]) : substringBefore(it, stopwords[b]); String head = strict ? clean(it, stopwords[b]) : substringBefore(it, stopwords[b]);
it = normalizePunctuation(clean(it, blacklist[b])); String norm = normalizePunctuation(clean(head, blacklist[b]));
return it; // debug.finest(format("CLEAN: %s => %s => %s", it, head, norm));
return norm;
}).filter(s -> s.length() > 0).collect(toList()); }).filter(s -> s.length() > 0).collect(toList());
} }
@ -310,8 +312,15 @@ public class ReleaseInfo {
public Pattern getClutterBracketPattern(boolean strict) { public Pattern getClutterBracketPattern(boolean strict) {
// match patterns like [Action, Drama] or {ENG-XViD-MP3-DVDRiP} etc // match patterns like [Action, Drama] or {ENG-XViD-MP3-DVDRiP} etc
String contentFilter = strict ? "[\\p{Space}\\p{Punct}&&[^\\[\\]]]" : "\\p{Alpha}"; String brackets = "()[]{}";
return compile("(?:\\[([^\\[\\]]+?" + contentFilter + "[^\\[\\]]+?)\\])|(?:\\{([^\\{\\}]+?" + contentFilter + "[^\\{\\}]+?)\\})|(?:\\(([^\\(\\)]+?" + contentFilter + "[^\\(\\)]+?)\\))"); String contains = strict ? "[[^a-z0-9]&&[^" + quote(brackets) + "]]" : "\\p{Alpha}";
return IntStream.range(0, brackets.length() / 2).map(i -> i * 2).mapToObj(i -> {
String open = quote(brackets.substring(i, i + 1));
String close = quote(brackets.substring(i + 1, i + 2));
String notOpenClose = "[^" + open + close + "]+?";
return open + "(" + notOpenClose + contains + notOpenClose + ")" + close;
}).collect(collectingAndThen(joining("|"), pattern -> compile(pattern, CASE_INSENSITIVE)));
} }
public Pattern getReleaseGroupPattern(boolean strict) throws Exception { public Pattern getReleaseGroupPattern(boolean strict) throws Exception {

View File

@ -1,31 +1,34 @@
package net.filebot.media; package net.filebot.media;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.io.File; import java.util.regex.Pattern;
import org.junit.Test; import org.junit.Test;
public class ReleaseInfoTest { public class ReleaseInfoTest {
ReleaseInfo info = new ReleaseInfo();
@Test @Test
public void getVideoSource() { public void getVideoSource() {
ReleaseInfo info = new ReleaseInfo(); assertEquals("DVDRip", info.getVideoSource("Jurassic.Park[1993]DvDrip-aXXo"));
File f = new File("Jurassic.Park[1993]DvDrip-aXXo.avi");
assertEquals("DVDRip", info.getVideoSource(f.getName()));
} }
@Test @Test
public void getReleaseGroup() throws Exception { public void getReleaseGroup() throws Exception {
ReleaseInfo info = new ReleaseInfo(); assertEquals("aXXo", info.getReleaseGroup("Jurassic.Park[1993]DvDrip-aXXo"));
File f = new File("Jurassic.Park[1993]DvDrip-aXXo.avi"); }
assertEquals("aXXo", info.getReleaseGroup(f.getName())); @Test
public void getClutterBracketPattern() throws Exception {
assertEquals("John [2016] (ENG)", clean(info.getClutterBracketPattern(true), "John [2016] [Action, Drama] (ENG)"));
assertEquals("John [2016] ", clean(info.getClutterBracketPattern(false), "John [2016] [Action, Drama] (ENG)"));
}
private static String clean(Pattern p, String s) {
return p.matcher(s).replaceAll("");
} }
} }