diff --git a/source/net/sourceforge/filebot/cli/CmdlineOperations.java b/source/net/sourceforge/filebot/cli/CmdlineOperations.java index f79a1935..bd4d231c 100644 --- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java +++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java @@ -95,12 +95,12 @@ public class CmdlineOperations implements CmdlineInterface { int cws = 0; // common word sequence double max = mediaFiles.size(); + SeriesNameMatcher nameMatcher = new SeriesNameMatcher(); Collection cwsList = emptySet(); if (max >= 5) { - cwsList = detectSeriesNames(mediaFiles); + cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0])); } - SeriesNameMatcher nameMatcher = new SeriesNameMatcher(); for (File f : mediaFiles) { // count SxE matches if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) { @@ -306,7 +306,7 @@ public class CmdlineOperations implements CmdlineInterface { for (File subtitleFile : subtitleFiles) { // check if subtitle corresponds to a movie file (same name, different extension) for (int i = 0; i < movieDescriptors.length; i++) { - if (movieDescriptors != null) { + if (movieDescriptors[i] != null) { if (isDerived(subtitleFile, movieFiles[i])) { File movieDestination = renameMap.get(movieFiles[i]); File subtitleDestination = new File(movieDestination.getParentFile(), getName(movieDestination) + "." + getExtension(subtitleFile)); @@ -568,21 +568,9 @@ public class CmdlineOperations implements CmdlineInterface { } - private Collection detectQuery(Collection mediaFiles, boolean strict) throws Exception { - Collection names = new LinkedHashSet(); - - // detect by imdb id from nfo file in the same folder - for (List file : mapByFolder(mediaFiles).values()) { - for (int imdbid : grepImdbIdFor(file.get(0))) { - Movie movie = WebServices.TMDb.getMovieDescriptor(imdbid, Locale.ENGLISH); - if (movie != null) { - names.add(movie.getName()); - } - } - } - + private List detectQuery(Collection mediaFiles, boolean strict) throws Exception { // detect series name by common word sequence - names.addAll(detectSeriesNames(mediaFiles)); + List names = detectSeriesNames(mediaFiles); if (names.isEmpty() || (strict && names.size() > 1)) { throw new Exception("Unable to auto-select query: " + names); diff --git a/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java index 6b84053a..b95b4c5b 100644 --- a/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java +++ b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java @@ -5,55 +5,125 @@ package net.sourceforge.filebot.mediainfo; import static java.util.ResourceBundle.*; import static java.util.concurrent.TimeUnit.*; import static java.util.regex.Pattern.*; +import static net.sourceforge.tuned.FileUtilities.*; import static net.sourceforge.tuned.StringUtilities.*; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; -import java.util.Scanner; +import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.filebot.MediaTypes; +import net.sourceforge.filebot.WebServices; import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.web.CachedResource; +import net.sourceforge.filebot.web.Movie; +import net.sourceforge.filebot.web.SearchResult; +import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult; public class ReleaseInfo { - public static Collection detectSeriesNames(Collection files) throws IOException { - SeriesNameMatcher matcher = new SeriesNameMatcher(); - ReleaseInfo cleaner = new ReleaseInfo(); + public static List detectSeriesNames(Collection files) throws Exception { + ReleaseInfo releaseInfo = new ReleaseInfo(); + + // don't allow duplicates + Map names = new LinkedHashMap(); + + for (SearchResult it : releaseInfo.lookupNameByInfoFile(files, Locale.ENGLISH)) { + names.put(it.getName().toLowerCase(), it.getName()); + } // match common word sequence and clean detected word sequence from unwanted elements - Collection names = matcher.matchAll(files.toArray(new File[files.size()])); - return new LinkedHashSet(cleaner.cleanRG(names)); + Collection matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()])); + for (String it : releaseInfo.cleanRG(matches)) { + names.put(it.toLowerCase(), it); + } + + return new ArrayList(names.values()); } - public static Set grepImdbIdFor(File movieFile) throws IOException { + public static Set grepImdbIdFor(File file) throws Exception { + ReleaseInfo releaseInfo = new ReleaseInfo(); Set collection = new LinkedHashSet(); - File movieFolder = movieFile.getParentFile(); // lookup imdb id from nfo files in this folder - for (File file : movieFolder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) { - Scanner scanner = new Scanner(new FileInputStream(file), "UTF-8"); - - try { - // scan for imdb id patterns like tt1234567 - String imdb = null; + for (File nfo : file.getParentFile().listFiles(MediaTypes.getDefaultFilter("application/nfo"))) { + String text = new String(readFile(nfo), "UTF-8"); + collection.addAll(releaseInfo.grepImdbId(text)); + } + + return collection; + } + + + public Set lookupNameByInfoFile(Collection files, Locale language) throws Exception { + Set names = new LinkedHashSet(); + + // search for id in sibling nfo files + for (File folder : mapByFolder(files).keySet()) { + for (File nfo : folder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) { + String text = new String(readFile(nfo), "UTF-8"); - while ((imdb = scanner.findWithinHorizon("(?<=tt)\\d{7}", 64 * 1024)) != null) { - collection.add(Integer.parseInt(imdb)); + for (int imdbid : grepImdbId(text)) { + Movie movie = WebServices.OpenSubtitles.getMovieDescriptor(imdbid, language); // movies and tv shows + if (movie != null) { + names.add(movie); + } } - } finally { - scanner.close(); + + for (int tvdbid : grepTheTvdbId(text)) { + TheTVDBSearchResult series = WebServices.TheTVDB.lookup(tvdbid, language); // just tv shows + if (series != null) { + names.add(series); + } + } + } + } + + return names; + } + + + public Set grepImdbId(CharSequence text) { + // scan for imdb id patterns like tt1234567 + Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text); + Set collection = new LinkedHashSet(); + + while (imdbMatch.find()) { + collection.add(Integer.parseInt(imdbMatch.group())); + } + + return collection; + } + + + public Set grepTheTvdbId(CharSequence text) { + // scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14 + Set collection = new LinkedHashSet(); + for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) { + try { + URL url = new URL(token); + if (url.getHost().contains("thetvdb")) { + Matcher idMatch = Pattern.compile("(?<=(^|\\W)id=)\\d+").matcher(url.getQuery()); + while (idMatch.find()) { + collection.add(Integer.parseInt(idMatch.group())); + } + } + } catch (MalformedURLException e) { + // parse for thetvdb urls, ignore everything else } } diff --git a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java index 8eacadb5..422ac354 100644 --- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java +++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java @@ -200,8 +200,8 @@ public class SeriesNameMatcher { protected String normalize(String name) { // remove group names and checksums, any [...] or (...) - name = name.replaceAll("\\([^\\(]*\\)", ""); - name = name.replaceAll("\\[[^\\[]*\\]", ""); + name = name.replaceAll("\\([^\\(]*\\)", " "); + name = name.replaceAll("\\[[^\\[]*\\]", " "); // remove/normalize special characters name = name.replaceAll("['`ยด]+", ""); diff --git a/source/net/sourceforge/filebot/web/TheTVDBClient.java b/source/net/sourceforge/filebot/web/TheTVDBClient.java index 7efc2ebf..eaae3d36 100644 --- a/source/net/sourceforge/filebot/web/TheTVDBClient.java +++ b/source/net/sourceforge/filebot/web/TheTVDBClient.java @@ -40,7 +40,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { private final String apikey; - + public TheTVDBClient(String apikey) { if (apikey == null) throw new NullPointerException("apikey must not be null"); @@ -48,37 +48,37 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { this.apikey = apikey; } - + @Override public String getName() { return "TheTVDB"; } - + @Override public Icon getIcon() { return ResourceManager.getIcon("search.thetvdb"); } - + @Override public boolean hasSingleSeasonSupport() { return true; } - + @Override public boolean hasLocaleSupport() { return true; } - + @Override public ResultCache getCache() { return new ResultCache(host, CacheManager.getInstance().getCache("web-datasource")); } - + @Override public List fetchSearchResult(String query, Locale language) throws Exception { // perform online search @@ -100,7 +100,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { return new ArrayList(resultSet.values()); } - + @Override public List fetchEpisodeList(SearchResult searchResult, Locale language) throws Exception { TheTVDBSearchResult series = (TheTVDBSearchResult) searchResult; @@ -160,7 +160,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { return episodes; } - + public Document getSeriesRecord(TheTVDBSearchResult searchResult, Locale language) throws Exception { URL seriesRecord = getResource(MirrorType.ZIP, "/api/" + apikey + "/series/" + searchResult.getSeriesId() + "/all/" + language.getLanguage() + ".zip"); @@ -183,7 +183,22 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { } } - + + public TheTVDBSearchResult lookup(int id, Locale language) throws Exception { + try { + URL baseRecordLocation = getResource(MirrorType.XML, "/api/" + apikey + "/series/" + id + "/all/" + language.getLanguage() + ".xml"); + Document baseRecord = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(baseRecordLocation.openStream()); + + String name = selectString("//SeriesName", baseRecord); + return new TheTVDBSearchResult(name, id); + } catch (FileNotFoundException e) { + // illegal series id + Logger.getLogger(getClass().getName()).log(Level.WARNING, "Failed to retrieve base series record", e); + return null; + } + } + + @Override public URI getEpisodeListLink(SearchResult searchResult) { int seriesId = ((TheTVDBSearchResult) searchResult).getSeriesId(); @@ -191,7 +206,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { return URI.create("http://" + host + "/?tab=seasonall&id=" + seriesId); } - + @Override public URI getEpisodeListLink(SearchResult searchResult, int season) { int seriesId = ((TheTVDBSearchResult) searchResult).getSeriesId(); @@ -210,7 +225,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { return null; } - + protected String getMirror(MirrorType mirrorType) throws Exception { synchronized (mirrors) { if (mirrors.isEmpty()) { @@ -253,7 +268,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { } } - + protected URL getResource(MirrorType mirrorType, String path) throws Exception { // use default server if (mirrorType == null) @@ -263,34 +278,34 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { return new URL(getMirror(mirrorType) + path); } - + public static class TheTVDBSearchResult extends SearchResult { protected int seriesId; - + protected TheTVDBSearchResult() { // used by serializer } - + public TheTVDBSearchResult(String seriesName, int seriesId) { super(seriesName); this.seriesId = seriesId; } - + public int getSeriesId() { return seriesId; } - + @Override public int hashCode() { return seriesId; } - + @Override public boolean equals(Object object) { if (object instanceof TheTVDBSearchResult) { @@ -302,7 +317,7 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { } } - + protected static enum MirrorType { XML(1), BANNER(2), @@ -310,12 +325,12 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { private final int bitMask; - + private MirrorType(int bitMask) { this.bitMask = bitMask; } - + public static EnumSet fromTypeMask(int typeMask) { // initialize enum set with all types EnumSet enumSet = EnumSet.allOf(MirrorType.class);