+ try to auto-detect name from imdb/thetvdb ID if possible

2011-12-12 14:06:26 +00:00 · 2011-12-12 14:06:26 +00:00 · c1ed273158
parent c37c38c2c7
commit c1ed273158
4 changed files with 133 additions and 60 deletions
--- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java
+++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java
@ -95,12 +95,12 @@ public class CmdlineOperations implements CmdlineInterface {
 		int cws = 0; // common word sequence
 		double max = mediaFiles.size();
 		
+		SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
 		Collection<String> cwsList = emptySet();
 		if (max >= 5) {
-			cwsList = detectSeriesNames(mediaFiles);
+			cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
 		}
 		
-		SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
 		for (File f : mediaFiles) {
 			// count SxE matches
 			if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) {
@ -306,7 +306,7 @@ public class CmdlineOperations implements CmdlineInterface {
 		for (File subtitleFile : subtitleFiles) {
 			// check if subtitle corresponds to a movie file (same name, different extension)
 			for (int i = 0; i < movieDescriptors.length; i++) {
-				if (movieDescriptors != null) {
+				if (movieDescriptors[i] != null) {
 					if (isDerived(subtitleFile, movieFiles[i])) {
 						File movieDestination = renameMap.get(movieFiles[i]);
 						File subtitleDestination = new File(movieDestination.getParentFile(), getName(movieDestination) + "." + getExtension(subtitleFile));
@ -568,21 +568,9 @@ public class CmdlineOperations implements CmdlineInterface {
 	}
 	
 	
-	private Collection<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
-		Collection<String> names = new LinkedHashSet<String>();
-		
-		// detect by imdb id from nfo file in the same folder
-		for (List<File> file : mapByFolder(mediaFiles).values()) {
-			for (int imdbid : grepImdbIdFor(file.get(0))) {
-				Movie movie = WebServices.TMDb.getMovieDescriptor(imdbid, Locale.ENGLISH);
-				if (movie != null) {
-					names.add(movie.getName());
-				}
-			}
-		}
-		
+	private List<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
 		// detect series name by common word sequence
-		names.addAll(detectSeriesNames(mediaFiles));
+		List<String> names = detectSeriesNames(mediaFiles);
 		
 		if (names.isEmpty() || (strict && names.size() > 1)) {
 			throw new Exception("Unable to auto-select query: " + names);
--- a/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java
+++ b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java
@ -5,55 +5,125 @@ package net.sourceforge.filebot.mediainfo;
 import static java.util.ResourceBundle.*;
 import static java.util.concurrent.TimeUnit.*;
 import static java.util.regex.Pattern.*;
+import static net.sourceforge.tuned.FileUtilities.*;
 import static net.sourceforge.tuned.StringUtilities.*;

 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Scanner;
+import java.util.Locale;
+import java.util.Map;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import net.sourceforge.filebot.MediaTypes;
+import net.sourceforge.filebot.WebServices;
 import net.sourceforge.filebot.similarity.SeriesNameMatcher;
 import net.sourceforge.filebot.web.CachedResource;
+import net.sourceforge.filebot.web.Movie;
+import net.sourceforge.filebot.web.SearchResult;
+import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;


 public class ReleaseInfo {
 	
-	public static Collection<String> detectSeriesNames(Collection<File> files) throws IOException {
-		SeriesNameMatcher matcher = new SeriesNameMatcher();
-		ReleaseInfo cleaner = new ReleaseInfo();
+	public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
+		ReleaseInfo releaseInfo = new ReleaseInfo();
+		
+		// don't allow duplicates
+		Map<String, String> names = new LinkedHashMap<String, String>();
+		
+		for (SearchResult it : releaseInfo.lookupNameByInfoFile(files, Locale.ENGLISH)) {
+			names.put(it.getName().toLowerCase(), it.getName());
+		}
 		
 		// match common word sequence and clean detected word sequence from unwanted elements
-		Collection<String> names = matcher.matchAll(files.toArray(new File[files.size()]));
-		return new LinkedHashSet<String>(cleaner.cleanRG(names));
+		Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
+		for (String it : releaseInfo.cleanRG(matches)) {
+			names.put(it.toLowerCase(), it);
+		}
+		
+		return new ArrayList<String>(names.values());
 	}
 	
 	
-	public static Set<Integer> grepImdbIdFor(File movieFile) throws IOException {
+	public static Set<Integer> grepImdbIdFor(File file) throws Exception {
+		ReleaseInfo releaseInfo = new ReleaseInfo();
 		Set<Integer> collection = new LinkedHashSet<Integer>();
-		File movieFolder = movieFile.getParentFile(); // lookup imdb id from nfo files in this folder
 		
-		for (File file : movieFolder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
-			Scanner scanner = new Scanner(new FileInputStream(file), "UTF-8");
-			
-			try {
-				// scan for imdb id patterns like tt1234567
-				String imdb = null;
-				
-				while ((imdb = scanner.findWithinHorizon("(?<=tt)\\d{7}", 64 * 1024)) != null) {
-					collection.add(Integer.parseInt(imdb));
+		for (File nfo : file.getParentFile().listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
+			String text = new String(readFile(nfo), "UTF-8");
+			collection.addAll(releaseInfo.grepImdbId(text));
 		}
-			} finally {
-				scanner.close();
+		
+		return collection;
+	}
+	
+	
+	public Set<SearchResult> lookupNameByInfoFile(Collection<File> files, Locale language) throws Exception {
+		Set<SearchResult> names = new LinkedHashSet<SearchResult>();
+		
+		// search for id in sibling nfo files
+		for (File folder : mapByFolder(files).keySet()) {
+			for (File nfo : folder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
+				String text = new String(readFile(nfo), "UTF-8");
+				
+				for (int imdbid : grepImdbId(text)) {
+					Movie movie = WebServices.OpenSubtitles.getMovieDescriptor(imdbid, language); // movies and tv shows
+					if (movie != null) {
+						names.add(movie);
+					}
+				}
+				
+				for (int tvdbid : grepTheTvdbId(text)) {
+					TheTVDBSearchResult series = WebServices.TheTVDB.lookup(tvdbid, language); // just tv shows
+					if (series != null) {
+						names.add(series);
+					}
+				}
+			}
+		}
+		
+		return names;
+	}
+	
+	
+	public Set<Integer> grepImdbId(CharSequence text) {
+		// scan for imdb id patterns like tt1234567
+		Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
+		Set<Integer> collection = new LinkedHashSet<Integer>();
+		
+		while (imdbMatch.find()) {
+			collection.add(Integer.parseInt(imdbMatch.group()));
+		}
+		
+		return collection;
+	}
+	
+	
+	public Set<Integer> grepTheTvdbId(CharSequence text) {
+		// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
+		Set<Integer> collection = new LinkedHashSet<Integer>();
+		for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) {
+			try {
+				URL url = new URL(token);
+				if (url.getHost().contains("thetvdb")) {
+					Matcher idMatch = Pattern.compile("(?<=(^|\\W)id=)\\d+").matcher(url.getQuery());
+					while (idMatch.find()) {
+						collection.add(Integer.parseInt(idMatch.group()));
+					}
+				}
+			} catch (MalformedURLException e) {
+				// parse for thetvdb urls, ignore everything else
 			}
 		}
 		
--- a/source/net/sourceforge/filebot/web/TheTVDBClient.java
+++ b/source/net/sourceforge/filebot/web/TheTVDBClient.java
@ -184,6 +184,21 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
 	}
 	
 	
+	public TheTVDBSearchResult lookup(int id, Locale language) throws Exception {
+		try {
+			URL baseRecordLocation = getResource(MirrorType.XML, "/api/" + apikey + "/series/" + id + "/all/" + language.getLanguage() + ".xml");
+			Document baseRecord = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(baseRecordLocation.openStream());
+			
+			String name = selectString("//SeriesName", baseRecord);
+			return new TheTVDBSearchResult(name, id);
+		} catch (FileNotFoundException e) {
+			// illegal series id
+			Logger.getLogger(getClass().getName()).log(Level.WARNING, "Failed to retrieve base series record", e);
+			return null;
+		}
+	}
+	
+	
 	@Override
 	public URI getEpisodeListLink(SearchResult searchResult) {
 		int seriesId = ((TheTVDBSearchResult) searchResult).getSeriesId();