+ try to auto-detect name from imdb/thetvdb ID if possible

This commit is contained in:
Reinhard Pointner 2011-12-12 14:06:26 +00:00
parent c37c38c2c7
commit c1ed273158
4 changed files with 133 additions and 60 deletions

View File

@ -95,12 +95,12 @@ public class CmdlineOperations implements CmdlineInterface {
int cws = 0; // common word sequence int cws = 0; // common word sequence
double max = mediaFiles.size(); double max = mediaFiles.size();
SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
Collection<String> cwsList = emptySet(); Collection<String> cwsList = emptySet();
if (max >= 5) { if (max >= 5) {
cwsList = detectSeriesNames(mediaFiles); cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
} }
SeriesNameMatcher nameMatcher = new SeriesNameMatcher();
for (File f : mediaFiles) { for (File f : mediaFiles) {
// count SxE matches // count SxE matches
if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) { if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) {
@ -306,7 +306,7 @@ public class CmdlineOperations implements CmdlineInterface {
for (File subtitleFile : subtitleFiles) { for (File subtitleFile : subtitleFiles) {
// check if subtitle corresponds to a movie file (same name, different extension) // check if subtitle corresponds to a movie file (same name, different extension)
for (int i = 0; i < movieDescriptors.length; i++) { for (int i = 0; i < movieDescriptors.length; i++) {
if (movieDescriptors != null) { if (movieDescriptors[i] != null) {
if (isDerived(subtitleFile, movieFiles[i])) { if (isDerived(subtitleFile, movieFiles[i])) {
File movieDestination = renameMap.get(movieFiles[i]); File movieDestination = renameMap.get(movieFiles[i]);
File subtitleDestination = new File(movieDestination.getParentFile(), getName(movieDestination) + "." + getExtension(subtitleFile)); File subtitleDestination = new File(movieDestination.getParentFile(), getName(movieDestination) + "." + getExtension(subtitleFile));
@ -568,21 +568,9 @@ public class CmdlineOperations implements CmdlineInterface {
} }
private Collection<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception { private List<String> detectQuery(Collection<File> mediaFiles, boolean strict) throws Exception {
Collection<String> names = new LinkedHashSet<String>();
// detect by imdb id from nfo file in the same folder
for (List<File> file : mapByFolder(mediaFiles).values()) {
for (int imdbid : grepImdbIdFor(file.get(0))) {
Movie movie = WebServices.TMDb.getMovieDescriptor(imdbid, Locale.ENGLISH);
if (movie != null) {
names.add(movie.getName());
}
}
}
// detect series name by common word sequence // detect series name by common word sequence
names.addAll(detectSeriesNames(mediaFiles)); List<String> names = detectSeriesNames(mediaFiles);
if (names.isEmpty() || (strict && names.size() > 1)) { if (names.isEmpty() || (strict && names.size() > 1)) {
throw new Exception("Unable to auto-select query: " + names); throw new Exception("Unable to auto-select query: " + names);

View File

@ -5,55 +5,125 @@ package net.sourceforge.filebot.mediainfo;
import static java.util.ResourceBundle.*; import static java.util.ResourceBundle.*;
import static java.util.concurrent.TimeUnit.*; import static java.util.concurrent.TimeUnit.*;
import static java.util.regex.Pattern.*; import static java.util.regex.Pattern.*;
import static net.sourceforge.tuned.FileUtilities.*;
import static net.sourceforge.tuned.StringUtilities.*; import static net.sourceforge.tuned.StringUtilities.*;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Scanner; import java.util.Locale;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.sourceforge.filebot.MediaTypes; import net.sourceforge.filebot.MediaTypes;
import net.sourceforge.filebot.WebServices;
import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.web.CachedResource; import net.sourceforge.filebot.web.CachedResource;
import net.sourceforge.filebot.web.Movie;
import net.sourceforge.filebot.web.SearchResult;
import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
public class ReleaseInfo { public class ReleaseInfo {
public static Collection<String> detectSeriesNames(Collection<File> files) throws IOException { public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
SeriesNameMatcher matcher = new SeriesNameMatcher(); ReleaseInfo releaseInfo = new ReleaseInfo();
ReleaseInfo cleaner = new ReleaseInfo();
// don't allow duplicates
Map<String, String> names = new LinkedHashMap<String, String>();
for (SearchResult it : releaseInfo.lookupNameByInfoFile(files, Locale.ENGLISH)) {
names.put(it.getName().toLowerCase(), it.getName());
}
// match common word sequence and clean detected word sequence from unwanted elements // match common word sequence and clean detected word sequence from unwanted elements
Collection<String> names = matcher.matchAll(files.toArray(new File[files.size()])); Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
return new LinkedHashSet<String>(cleaner.cleanRG(names)); for (String it : releaseInfo.cleanRG(matches)) {
names.put(it.toLowerCase(), it);
}
return new ArrayList<String>(names.values());
} }
public static Set<Integer> grepImdbIdFor(File movieFile) throws IOException { public static Set<Integer> grepImdbIdFor(File file) throws Exception {
ReleaseInfo releaseInfo = new ReleaseInfo();
Set<Integer> collection = new LinkedHashSet<Integer>(); Set<Integer> collection = new LinkedHashSet<Integer>();
File movieFolder = movieFile.getParentFile(); // lookup imdb id from nfo files in this folder
for (File file : movieFolder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) { for (File nfo : file.getParentFile().listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
Scanner scanner = new Scanner(new FileInputStream(file), "UTF-8"); String text = new String(readFile(nfo), "UTF-8");
collection.addAll(releaseInfo.grepImdbId(text));
}
try { return collection;
// scan for imdb id patterns like tt1234567 }
String imdb = null;
while ((imdb = scanner.findWithinHorizon("(?<=tt)\\d{7}", 64 * 1024)) != null) {
collection.add(Integer.parseInt(imdb)); public Set<SearchResult> lookupNameByInfoFile(Collection<File> files, Locale language) throws Exception {
Set<SearchResult> names = new LinkedHashSet<SearchResult>();
// search for id in sibling nfo files
for (File folder : mapByFolder(files).keySet()) {
for (File nfo : folder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
String text = new String(readFile(nfo), "UTF-8");
for (int imdbid : grepImdbId(text)) {
Movie movie = WebServices.OpenSubtitles.getMovieDescriptor(imdbid, language); // movies and tv shows
if (movie != null) {
names.add(movie);
}
} }
} finally {
scanner.close(); for (int tvdbid : grepTheTvdbId(text)) {
TheTVDBSearchResult series = WebServices.TheTVDB.lookup(tvdbid, language); // just tv shows
if (series != null) {
names.add(series);
}
}
}
}
return names;
}
public Set<Integer> grepImdbId(CharSequence text) {
// scan for imdb id patterns like tt1234567
Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
Set<Integer> collection = new LinkedHashSet<Integer>();
while (imdbMatch.find()) {
collection.add(Integer.parseInt(imdbMatch.group()));
}
return collection;
}
public Set<Integer> grepTheTvdbId(CharSequence text) {
// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
Set<Integer> collection = new LinkedHashSet<Integer>();
for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) {
try {
URL url = new URL(token);
if (url.getHost().contains("thetvdb")) {
Matcher idMatch = Pattern.compile("(?<=(^|\\W)id=)\\d+").matcher(url.getQuery());
while (idMatch.find()) {
collection.add(Integer.parseInt(idMatch.group()));
}
}
} catch (MalformedURLException e) {
// parse for thetvdb urls, ignore everything else
} }
} }

View File

@ -200,8 +200,8 @@ public class SeriesNameMatcher {
protected String normalize(String name) { protected String normalize(String name) {
// remove group names and checksums, any [...] or (...) // remove group names and checksums, any [...] or (...)
name = name.replaceAll("\\([^\\(]*\\)", ""); name = name.replaceAll("\\([^\\(]*\\)", " ");
name = name.replaceAll("\\[[^\\[]*\\]", ""); name = name.replaceAll("\\[[^\\[]*\\]", " ");
// remove/normalize special characters // remove/normalize special characters
name = name.replaceAll("['`´]+", ""); name = name.replaceAll("['`´]+", "");

View File

@ -184,6 +184,21 @@ public class TheTVDBClient extends AbstractEpisodeListProvider {
} }
public TheTVDBSearchResult lookup(int id, Locale language) throws Exception {
try {
URL baseRecordLocation = getResource(MirrorType.XML, "/api/" + apikey + "/series/" + id + "/all/" + language.getLanguage() + ".xml");
Document baseRecord = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(baseRecordLocation.openStream());
String name = selectString("//SeriesName", baseRecord);
return new TheTVDBSearchResult(name, id);
} catch (FileNotFoundException e) {
// illegal series id
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Failed to retrieve base series record", e);
return null;
}
}
@Override @Override
public URI getEpisodeListLink(SearchResult searchResult) { public URI getEpisodeListLink(SearchResult searchResult) {
int seriesId = ((TheTVDBSearchResult) searchResult).getSeriesId(); int seriesId = ((TheTVDBSearchResult) searchResult).getSeriesId();