* clean release info before submitting queries

This commit is contained in:
Reinhard Pointner 2011-11-14 11:43:22 +00:00
parent d4a70f0fbc
commit 30993a5cb6
6 changed files with 157 additions and 54 deletions

View File

@ -2,28 +2,21 @@
package net.sourceforge.filebot.format;
import static java.util.Arrays.*;
import static java.util.ResourceBundle.*;
import static java.util.regex.Pattern.*;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.format.Define.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import net.sourceforge.filebot.hash.HashType;
import net.sourceforge.filebot.mediainfo.MediaInfo;
import net.sourceforge.filebot.mediainfo.ReleaseInfo;
import net.sourceforge.filebot.mediainfo.MediaInfo.StreamKind;
import net.sourceforge.filebot.web.CachedResource;
import net.sourceforge.filebot.web.Date;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.Movie;
@ -249,21 +242,10 @@ public class MediaBindingBean {
public String getVideoSource() {
// use inferred media file
File inferredMediaFile = getInferredMediaFile();
// pattern matching any video source name
Pattern source = compile(getBundle(getClass().getName()).getString("pattern.video.source"), CASE_INSENSITIVE);
ReleaseInfo releaseInfo = new ReleaseInfo();
// look for video source patterns in media file and it's parent folder
String lastMatch = null;
for (File it : asList(inferredMediaFile.getParentFile(), inferredMediaFile)) {
for (String part : it.getName().split("[^\\p{Alnum}]")) {
if (source.matcher(part).matches()) {
lastMatch = part;
}
}
}
return lastMatch;
return releaseInfo.getVideoSource(inferredMediaFile);
}
@ -271,19 +253,10 @@ public class MediaBindingBean {
public String getReleaseGroup() throws IOException {
// use inferred media file
File inferredMediaFile = getInferredMediaFile();
// pattern matching any release group name enclosed in separators
Pattern groups = compile("(?<!\\p{Alnum})(" + releaseGroups.get() + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
ReleaseInfo releaseInfo = new ReleaseInfo();
// look for release group names in media file and it's parent folder
String lastMatch = null;
for (File it : asList(inferredMediaFile.getParentFile(), inferredMediaFile)) {
for (Matcher matcher = groups.matcher(it.getName()); matcher.find();) {
lastMatch = matcher.group();
}
}
return lastMatch;
return releaseInfo.getReleaseGroup(inferredMediaFile);
}
@ -417,14 +390,4 @@ public class MediaBindingBean {
return hash;
}
// fetch release group names online and try to update the data once per day
private final CachedResource<String> releaseGroups = new CachedResource<String>(getBundle(getClass().getName()).getString("url.release-groups"), 24 * 60 * 60 * 1000) {
@Override
public String process(ByteBuffer data) {
return compile("\\s").matcher(Charset.forName("UTF-8").decode(data)).replaceAll("|");
}
};
}

View File

@ -0,0 +1,106 @@
package net.sourceforge.filebot.mediainfo;
import static java.util.ResourceBundle.*;
import static java.util.regex.Pattern.*;
import static net.sourceforge.tuned.StringUtilities.*;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.filebot.web.CachedResource;
public class ReleaseInfo {
public String getVideoSource(File file) {
// check parent and itself for group names
return matchLast(getVideoSourcePattern(), file.getParent(), file.getName());
}
public String getReleaseGroup(File file) throws IOException {
// check parent and itself for group names
return matchLast(getReleaseGroupPattern(), file.getParent(), file.getName());
}
protected String matchLast(Pattern pattern, CharSequence... sequence) {
String lastMatch = null;
for (CharSequence name : sequence) {
if (name == null)
continue;
Matcher matcher = pattern.matcher(name);
while (matcher.find()) {
lastMatch = matcher.group();
}
}
return lastMatch;
}
public List<String> clean(Iterable<String> items) {
return clean(items, getVideoSourcePattern(), getCodecPattern());
}
public List<String> cleanRG(Iterable<String> items) throws IOException {
return clean(items, getReleaseGroupPattern(), getVideoSourcePattern(), getCodecPattern());
}
public List<String> clean(Iterable<String> items, Pattern... blacklisted) {
List<String> cleaned = new ArrayList<String>();
for (String string : items) {
for (Pattern it : blacklisted) {
string = it.matcher(string).replaceAll("");
}
cleaned.add(string.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim());
}
return cleaned;
}
public Pattern getCodecPattern() {
// pattern matching any video source name
String pattern = getBundle(getClass().getName()).getString("pattern.codec");
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
public Pattern getVideoSourcePattern() {
// pattern matching any video source name
String pattern = getBundle(getClass().getName()).getString("pattern.video.source");
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
public Pattern getReleaseGroupPattern() throws IOException {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
// fetch release group names online and try to update the data once per day
protected final CachedResource<String[]> releaseGroupResource = new CachedResource<String[]>(getBundle(getClass().getName()).getString("url.release-groups"), 24 * 60 * 60 * 1000) {
@Override
public String[] process(ByteBuffer data) {
return compile("\\s").split(Charset.forName("UTF-8").decode(data));
}
};
}

View File

@ -1,5 +1,8 @@
# source names mostly copied from [http://en.wikipedia.org/wiki/Pirated_movie_release_types]
pattern.video.source: CAMRip|CAM|TS|TELESYNC|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R5|R5LINE|DVDRip|DVDR|TVRip|DSR|PDTV|HDTV|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR
# additional release info patterns
pattern.codec: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|2ch|6ch|720p|1080p
# group names mostly copied from [http://scenelingo.wordpress.com/list-of-scene-release-groups]
url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt

View File

@ -13,11 +13,11 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -34,6 +34,7 @@ import javax.swing.Action;
import javax.swing.SwingUtilities;
import net.sourceforge.filebot.Analytics;
import net.sourceforge.filebot.mediainfo.ReleaseInfo;
import net.sourceforge.filebot.similarity.Match;
import net.sourceforge.filebot.ui.SelectDialog;
import net.sourceforge.filebot.web.Movie;
@ -160,11 +161,6 @@ class MovieHashMatcher implements AutoCompleteMatcher {
}
private String normalizeMovieName(File movie) {
return getName(movie).replaceAll("\\p{Punct}+", " ").trim();
}
protected Movie grabMovieName(File movieFile, Locale locale, boolean autodetect, Movie... suggestions) throws Exception {
List<Movie> options = new ArrayList<Movie>();
@ -185,9 +181,12 @@ class MovieHashMatcher implements AutoCompleteMatcher {
}
// search by file name or folder name
Set<String> searchQueries = new LinkedHashSet<String>(2);
searchQueries.add(normalizeMovieName(movieFile));
searchQueries.add(normalizeMovieName(movieFile.getParentFile()));
Collection<String> searchQueries = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
searchQueries.add(getName(movieFile));
searchQueries.add(getName(movieFile.getParentFile()));
// remove blacklisted terms
searchQueries = new ReleaseInfo().cleanRG(searchQueries);
for (String query : searchQueries) {
if (autodetect && options.isEmpty()) {
@ -197,7 +196,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
// allow manual user input
if (options.isEmpty() || !autodetect) {
String suggestion = options.isEmpty() ? normalizeMovieName(movieFile) : options.get(0).getName();
String suggestion = options.isEmpty() ? searchQueries.iterator().next() : options.get(0).getName();
String input = showInputDialog(null, "Enter movie name:", suggestion);
if (input != null) {

View File

@ -8,6 +8,7 @@ import org.junit.runners.Suite.SuiteClasses;
import net.sourceforge.filebot.format.ExpressionFormatTest;
import net.sourceforge.filebot.hash.VerificationFormatTest;
import net.sourceforge.filebot.mediainfo.ReleaseInfoTest;
import net.sourceforge.filebot.similarity.SimilarityTestSuite;
import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite;
import net.sourceforge.filebot.ui.rename.MatchModelTest;
@ -16,7 +17,7 @@ import net.sourceforge.filebot.web.WebTestSuite;
@RunWith(Suite.class)
@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, MatchSimilarityMetricTest.class, SubtitleReaderTestSuite.class })
@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, MatchSimilarityMetricTest.class, SubtitleReaderTestSuite.class, ReleaseInfoTest.class })
public class FileBotTestSuite {
}

View File

@ -0,0 +1,31 @@
package net.sourceforge.filebot.mediainfo;
import static org.junit.Assert.*;
import java.io.File;
import org.junit.Test;
public class ReleaseInfoTest {
@Test
public void getVideoSource() {
ReleaseInfo info = new ReleaseInfo();
File f = new File("Jurassic.Park[1993]DvDrip-aXXo.avi");
assertEquals("DvDrip", info.getVideoSource(f));
}
@Test
public void getReleaseGroup() throws Exception {
ReleaseInfo info = new ReleaseInfo();
File f = new File("Jurassic.Park[1993]DvDrip-aXXo.avi");
assertEquals("aXXo", info.getReleaseGroup(f));
}
}