+ try to auto-detect name from imdb/thetvdb ID if possible

2011-12-12 14:06:26 +00:00 · 2011-12-12 14:06:26 +00:00 · c1ed273158
parent c37c38c2c7
commit c1ed273158
4 changed files with 133 additions and 60 deletions
--- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java
+++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java
@ -95,12 +95,12 @@ public class CmdlineOperations implements CmdlineInterface {
 		int cws = 0; // common word sequence
 		double max = mediaFiles.size();
 		SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
 		Collection<String> cwsList = emptySet();
 		if (max >= 5) {
-			cwsList = detectSeriesNames(mediaFiles);
+			cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
 		}
 		SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
 		for (File f : mediaFiles) {
 			// count SxE matches
 			if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) {
@ -306,7 +306,7 @@ public class CmdlineOperations implements CmdlineInterface {
 		for (File subtitleFile : subtitleFiles) {
 			// check if subtitle corresponds to a movie file (same name, different extension)
 			for (int i = 0; i < movieDescriptors.length; i++) {
-				if (movieDescriptors != null) {
+				if (movieDescriptors[i] != null) {
 					if (isDerived(subtitleFile, movieFiles[i])) {
 						File movieDestination = renameMap.get(movieFiles[i]);
 						File subtitleDestination = new File(movieDestination.getParentFile(), getName(movieDestination) + "." + getExtension(subtitleFile));
@ -568,21 +568,9 @@ public class CmdlineOperations implements CmdlineInterface {
 	}
-	private Collection<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
+	private List<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
 		Collection<String> names = new LinkedHashSet<String>();
 		// detect by imdb id from nfo file in the same folder
 		for (List<File> file : mapByFolder(mediaFiles).values()) {
 			for (int imdbid : grepImdbIdFor(file.get(0))) {
 				Movie movie = WebServices.TMDb.getMovieDescriptor(imdbid, Locale.ENGLISH);
 				if (movie != null) {
 					names.add(movie.getName());
 				}
 			}
 		}
 		// detect series name by common word sequence
-		names.addAll(detectSeriesNames(mediaFiles));
+		List<String> names = detectSeriesNames(mediaFiles);
 		if (names.isEmpty() || (strict && names.size() > 1)) {
 			throw new Exception("Unable to auto-select query: " + names);
--- a/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java
+++ b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java
@ -5,55 +5,125 @@ package net.sourceforge.filebot.mediainfo;
 import static java.util.ResourceBundle.*;
 import static java.util.concurrent.TimeUnit.*;
 import static java.util.regex.Pattern.*;
 import static net.sourceforge.tuned.FileUtilities.*;
 import static net.sourceforge.tuned.StringUtilities.*;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Scanner;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import net.sourceforge.filebot.MediaTypes;
 import net.sourceforge.filebot.WebServices;
 import net.sourceforge.filebot.similarity.SeriesNameMatcher;
 import net.sourceforge.filebot.web.CachedResource;
 import net.sourceforge.filebot.web.Movie;
 import net.sourceforge.filebot.web.SearchResult;
 import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
 public class ReleaseInfo {
-	public static Collection<String> detectSeriesNames(Collection<File> files) throws IOException {
+	public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
-		SeriesNameMatcher matcher = new SeriesNameMatcher();
+		ReleaseInfo releaseInfo = new ReleaseInfo();
-		ReleaseInfo cleaner = new ReleaseInfo();
+		
 		// don't allow duplicates
 		Map<String, String> names = new LinkedHashMap<String, String>();
 		for (SearchResult it : releaseInfo.lookupNameByInfoFile(files, Locale.ENGLISH)) {
 			names.put(it.getName().toLowerCase(), it.getName());
 		}
 		// match common word sequence and clean detected word sequence from unwanted elements
-		Collection<String> names = matcher.matchAll(files.toArray(new File[files.size()]));
+		Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
-		return new LinkedHashSet<String>(cleaner.cleanRG(names));
+		for (String it : releaseInfo.cleanRG(matches)) {
 			names.put(it.toLowerCase(), it);
 		}
 		return new ArrayList<String>(names.values());
 	}
-	public static Set<Integer> grepImdbIdFor(File movieFile) throws IOException {
+	public static Set<Integer> grepImdbIdFor(File file) throws Exception {
 		ReleaseInfo releaseInfo = new ReleaseInfo();
 		Set<Integer> collection = new LinkedHashSet<Integer>();
 		File movieFolder = movieFile.getParentFile(); // lookup imdb id from nfo files in this folder
-		for (File file : movieFolder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
+		for (File nfo : file.getParentFile().listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
-			Scanner scanner = new Scanner(new FileInputStream(file), "UTF-8");
+			String text = new String(readFile(nfo), "UTF-8");
 			collection.addAll(releaseInfo.grepImdbId(text));
 		}
-			try {
+		return collection;
-				// scan for imdb id patterns like tt1234567
+	}
 				String imdb = null;
-				while ((imdb = scanner.findWithinHorizon("(?<=tt)\\d{7}", 64 * 1024)) != null) {
+	
-					collection.add(Integer.parseInt(imdb));
+	public Set<SearchResult> lookupNameByInfoFile(Collection<File> files, Locale language) throws Exception {
 		Set<SearchResult> names = new LinkedHashSet<SearchResult>();
 		// search for id in sibling nfo files
 		for (File folder : mapByFolder(files).keySet()) {
 			for (File nfo : folder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
 				String text = new String(readFile(nfo), "UTF-8");
 				for (int imdbid : grepImdbId(text)) {
 					Movie movie = WebServices.OpenSubtitles.getMovieDescriptor(imdbid, language); // movies and tv shows
 					if (movie != null) {
 						names.add(movie);
 					}
 				}
-			} finally {
+				
-				scanner.close();
+				for (int tvdbid : grepTheTvdbId(text)) {
 					TheTVDBSearchResult series = WebServices.TheTVDB.lookup(tvdbid, language); // just tv shows
 					if (series != null) {
 						names.add(series);
 					}
 				}
 			}
 		}
 		return names;
 	}
 	public Set<Integer> grepImdbId(CharSequence text) {
 		// scan for imdb id patterns like tt1234567
 		Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
 		Set<Integer> collection = new LinkedHashSet<Integer>();
 		while (imdbMatch.find()) {
 			collection.add(Integer.parseInt(imdbMatch.group()));
 		}
 		return collection;
 	}
 	public Set<Integer> grepTheTvdbId(CharSequence text) {
 		// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
 		Set<Integer> collection = new LinkedHashSet<Integer>();
 		for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) {
 			try {
 				URL url = new URL(token);
 				if (url.getHost().contains("thetvdb")) {
 					Matcher idMatch = Pattern.compile("(?<=(^|\\W)id=)\\d+").matcher(url.getQuery());
 					while (idMatch.find()) {
 						collection.add(Integer.parseInt(idMatch.group()));
 					}
 				}
 			} catch (MalformedURLException e) {
 				// parse for thetvdb urls, ignore everything else
 			}
 		}
--- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
+++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
@ -200,8 +200,8 @@ public class SeriesNameMatcher {
 	protected String normalize(String name) {
 		// remove group names and checksums, any [...] or (...)
-		name = name.replaceAll("\\([^\\(]*\\)", "");
+		name = name.replaceAll("\\([^\\(]*\\)", " ");
-		name = name.replaceAll("\\[[^\\[]*\\]", "");
+		name = name.replaceAll("\\[[^\\[]*\\]", " ");
 		// remove/normalize special characters
 		name = name.replaceAll("['`´]+", "");
--- a/source/net/sourceforge/filebot/web/TheTVDBClient.java
+++ b/source/net/sourceforge/filebot/web/TheTVDBClient.java
@ -184,6 +184,21 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
 	}
 	public TheTVDBSearchResult lookup(int id, Locale language) throws Exception {
 		try {
 			URL baseRecordLocation = getResource(MirrorType.XML, "/api/" + apikey + "/series/" + id + "/all/" + language.getLanguage() + ".xml");
 			Document baseRecord = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(baseRecordLocation.openStream());
 			String name = selectString("//SeriesName", baseRecord);
 			return new TheTVDBSearchResult(name, id);
 		} catch (FileNotFoundException e) {
 			// illegal series id
 			Logger.getLogger(getClass().getName()).log(Level.WARNING, "Failed to retrieve base series record", e);
 			return null;
 		}
 	}
 	@Override
 	public URI getEpisodeListLink(SearchResult searchResult) {
 		int seriesId = ((TheTVDBSearchResult) searchResult).getSeriesId();