* auto-determine optimal series/files match sets (combine all files per show)
* fine-tune name similarity metric to 5 seperation
This commit is contained in:
parent
3668b02ed5
commit
fe74476232
|
@ -8,7 +8,7 @@ import static net.sourceforge.filebot.MediaTypes.*;
|
|||
import static net.sourceforge.filebot.WebServices.*;
|
||||
import static net.sourceforge.filebot.cli.CLILogging.*;
|
||||
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
|
||||
import static net.sourceforge.filebot.mediainfo.ReleaseInfo.*;
|
||||
import static net.sourceforge.filebot.mediainfo.MediaDetection.*;
|
||||
import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
|
||||
|
|
|
@ -140,7 +140,7 @@ def parseDate(path) {
|
|||
}
|
||||
|
||||
def detectSeriesName(files) {
|
||||
def names = ReleaseInfo.detectSeriesNames(files.findAll { it.isVideo() || it.isSubtitle() })
|
||||
def names = MediaDetection.detectSeriesNames(files.findAll { it.isVideo() || it.isSubtitle() })
|
||||
return names == null || names.isEmpty() ? null : names[0]
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,205 @@
|
|||
|
||||
package net.sourceforge.filebot.mediainfo;
|
||||
|
||||
|
||||
import static net.sourceforge.filebot.MediaTypes.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sourceforge.filebot.MediaTypes;
|
||||
import net.sourceforge.filebot.WebServices;
|
||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||
import net.sourceforge.filebot.web.SearchResult;
|
||||
import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
||||
|
||||
|
||||
public class MediaDetection {
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapFoldersBySeriesNames(Collection<File> files) throws Exception {
|
||||
SortedMap<File, List<File>> filesByFolder = mapByFolder(filter(files, VIDEO_FILES, SUBTITLE_FILES));
|
||||
|
||||
// map series names by folder
|
||||
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
|
||||
|
||||
for (Entry<File, List<File>> it : filesByFolder.entrySet()) {
|
||||
Set<String> namesForFolder = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||
namesForFolder.addAll(detectSeriesNames(it.getValue()));
|
||||
|
||||
seriesNamesByFolder.put(it.getKey(), namesForFolder);
|
||||
}
|
||||
|
||||
// reverse map folders by series name
|
||||
Map<String, Set<File>> foldersBySeriesName = new HashMap<String, Set<File>>();
|
||||
|
||||
for (Set<String> nameSet : seriesNamesByFolder.values()) {
|
||||
for (String name : nameSet) {
|
||||
Set<File> foldersForSeries = new HashSet<File>();
|
||||
for (Entry<File, Set<String>> it : seriesNamesByFolder.entrySet()) {
|
||||
if (it.getValue().contains(name)) {
|
||||
foldersForSeries.add(it.getKey());
|
||||
}
|
||||
}
|
||||
foldersBySeriesName.put(name, foldersForSeries);
|
||||
}
|
||||
}
|
||||
|
||||
// join both sets
|
||||
Map<Set<File>, Set<String>> matchSets = new HashMap<Set<File>, Set<String>>();
|
||||
|
||||
while (seriesNamesByFolder.size() > 0) {
|
||||
Set<String> combinedNameSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||
Set<File> combinedFolderSet = new HashSet<File>();
|
||||
|
||||
// build combined match set
|
||||
combinedFolderSet.add(seriesNamesByFolder.keySet().iterator().next());
|
||||
|
||||
boolean resolveFurther = true;
|
||||
while (resolveFurther) {
|
||||
boolean modified = false;
|
||||
for (File folder : combinedFolderSet) {
|
||||
modified |= combinedNameSet.addAll(seriesNamesByFolder.get(folder));
|
||||
}
|
||||
for (String name : combinedNameSet) {
|
||||
modified |= combinedFolderSet.addAll(foldersBySeriesName.get(name));
|
||||
}
|
||||
resolveFurther &= modified;
|
||||
}
|
||||
|
||||
// build result entry
|
||||
Set<File> combinedFileSet = new TreeSet<File>();
|
||||
for (File folder : combinedFolderSet) {
|
||||
combinedFileSet.addAll(filesByFolder.get(folder));
|
||||
}
|
||||
matchSets.put(combinedFileSet, combinedNameSet);
|
||||
|
||||
// set folders as accounted for
|
||||
seriesNamesByFolder.keySet().removeAll(combinedFolderSet);
|
||||
}
|
||||
|
||||
return matchSets;
|
||||
}
|
||||
|
||||
|
||||
public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
|
||||
// don't allow duplicates
|
||||
Map<String, String> names = new LinkedHashMap<String, String>();
|
||||
|
||||
try {
|
||||
for (SearchResult it : lookupSeriesNameByInfoFile(files, Locale.ENGLISH)) {
|
||||
names.put(it.getName().toLowerCase(), it.getName());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to lookup info by id: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
// match common word sequence and clean detected word sequence from unwanted elements
|
||||
Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
|
||||
|
||||
try {
|
||||
matches = new ReleaseInfo().cleanRG(matches);
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to clean matches: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
for (String it : matches) {
|
||||
names.put(it.toLowerCase(), it);
|
||||
}
|
||||
|
||||
return new ArrayList<String>(names.values());
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepImdbIdFor(File file) throws Exception {
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
|
||||
for (File nfo : file.getParentFile().listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
|
||||
String text = new String(readFile(nfo), "UTF-8");
|
||||
collection.addAll(grepImdbId(text));
|
||||
}
|
||||
|
||||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public static Set<SearchResult> lookupSeriesNameByInfoFile(Collection<File> files, Locale language) throws Exception {
|
||||
Set<SearchResult> names = new LinkedHashSet<SearchResult>();
|
||||
|
||||
// search for id in sibling nfo files
|
||||
for (File folder : mapByFolder(files).keySet()) {
|
||||
for (File nfo : folder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
|
||||
String text = new String(readFile(nfo), "UTF-8");
|
||||
|
||||
for (int imdbid : grepImdbId(text)) {
|
||||
TheTVDBSearchResult series = WebServices.TheTVDB.lookupByIMDbID(imdbid, language);
|
||||
if (series != null) {
|
||||
names.add(series);
|
||||
}
|
||||
}
|
||||
|
||||
for (int tvdbid : grepTheTvdbId(text)) {
|
||||
TheTVDBSearchResult series = WebServices.TheTVDB.lookupByID(tvdbid, language);
|
||||
if (series != null) {
|
||||
names.add(series);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepImdbId(CharSequence text) {
|
||||
// scan for imdb id patterns like tt1234567
|
||||
Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
|
||||
while (imdbMatch.find()) {
|
||||
collection.add(Integer.parseInt(imdbMatch.group()));
|
||||
}
|
||||
|
||||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepTheTvdbId(CharSequence text) {
|
||||
// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) {
|
||||
try {
|
||||
URL url = new URL(token);
|
||||
if (url.getHost().contains("thetvdb") && url.getQuery() != null) {
|
||||
Matcher idMatch = Pattern.compile("(?<=(^|\\W)id=)\\d+").matcher(url.getQuery());
|
||||
while (idMatch.find()) {
|
||||
collection.add(Integer.parseInt(idMatch.group()));
|
||||
}
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
// parse for thetvdb urls, ignore everything else
|
||||
}
|
||||
}
|
||||
|
||||
return collection;
|
||||
}
|
||||
|
||||
}
|
|
@ -5,144 +5,22 @@ package net.sourceforge.filebot.mediainfo;
|
|||
import static java.util.ResourceBundle.*;
|
||||
import static java.util.concurrent.TimeUnit.*;
|
||||
import static java.util.regex.Pattern.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
import static net.sourceforge.tuned.StringUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sourceforge.filebot.MediaTypes;
|
||||
import net.sourceforge.filebot.WebServices;
|
||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||
import net.sourceforge.filebot.web.CachedResource;
|
||||
import net.sourceforge.filebot.web.SearchResult;
|
||||
import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
||||
|
||||
|
||||
public class ReleaseInfo {
|
||||
|
||||
public static List<String> detectSeriesNames(Collection<File> files) throws Exception {
|
||||
ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||
|
||||
// don't allow duplicates
|
||||
Map<String, String> names = new LinkedHashMap<String, String>();
|
||||
|
||||
try {
|
||||
for (SearchResult it : releaseInfo.lookupSeriesNameByInfoFile(files, Locale.ENGLISH)) {
|
||||
names.put(it.getName().toLowerCase(), it.getName());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(ReleaseInfo.class.getClass().getName()).log(Level.WARNING, "Failed to lookup info by id: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
// match common word sequence and clean detected word sequence from unwanted elements
|
||||
Collection<String> matches = new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
|
||||
|
||||
try {
|
||||
matches = releaseInfo.cleanRG(matches);
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(ReleaseInfo.class.getClass().getName()).log(Level.WARNING, "Failed to clean matches: " + e.getMessage(), e);
|
||||
}
|
||||
|
||||
for (String it : matches) {
|
||||
names.put(it.toLowerCase(), it);
|
||||
}
|
||||
|
||||
return new ArrayList<String>(names.values());
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepImdbIdFor(File file) throws Exception {
|
||||
ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
|
||||
for (File nfo : file.getParentFile().listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
|
||||
String text = new String(readFile(nfo), "UTF-8");
|
||||
collection.addAll(releaseInfo.grepImdbId(text));
|
||||
}
|
||||
|
||||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public Set<SearchResult> lookupSeriesNameByInfoFile(Collection<File> files, Locale language) throws Exception {
|
||||
Set<SearchResult> names = new LinkedHashSet<SearchResult>();
|
||||
|
||||
// search for id in sibling nfo files
|
||||
for (File folder : mapByFolder(files).keySet()) {
|
||||
for (File nfo : folder.listFiles(MediaTypes.getDefaultFilter("application/nfo"))) {
|
||||
String text = new String(readFile(nfo), "UTF-8");
|
||||
|
||||
for (int imdbid : grepImdbId(text)) {
|
||||
TheTVDBSearchResult series = WebServices.TheTVDB.lookupByIMDbID(imdbid, language);
|
||||
if (series != null) {
|
||||
names.add(series);
|
||||
}
|
||||
}
|
||||
|
||||
for (int tvdbid : grepTheTvdbId(text)) {
|
||||
TheTVDBSearchResult series = WebServices.TheTVDB.lookupByID(tvdbid, language);
|
||||
if (series != null) {
|
||||
names.add(series);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
|
||||
public Set<Integer> grepImdbId(CharSequence text) {
|
||||
// scan for imdb id patterns like tt1234567
|
||||
Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
|
||||
while (imdbMatch.find()) {
|
||||
collection.add(Integer.parseInt(imdbMatch.group()));
|
||||
}
|
||||
|
||||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public Set<Integer> grepTheTvdbId(CharSequence text) {
|
||||
// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) {
|
||||
try {
|
||||
URL url = new URL(token);
|
||||
if (url.getHost().contains("thetvdb") && url.getQuery() != null) {
|
||||
Matcher idMatch = Pattern.compile("(?<=(^|\\W)id=)\\d+").matcher(url.getQuery());
|
||||
while (idMatch.find()) {
|
||||
collection.add(Integer.parseInt(idMatch.group()));
|
||||
}
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
// parse for thetvdb urls, ignore everything else
|
||||
}
|
||||
}
|
||||
|
||||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public String getVideoSource(File file) {
|
||||
// check parent and itself for group names
|
||||
return matchLast(getVideoSourcePattern(), file.getParent(), file.getName());
|
||||
|
|
|
@ -179,9 +179,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
// normalize absolute similarity to similarity rank (6 ranks in total),
|
||||
// normalize absolute similarity to similarity rank (5 ranks in total),
|
||||
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
||||
return (float) (floor(super.getSimilarity(o1, o2) * 6) / 6);
|
||||
return (float) (floor(super.getSimilarity(o1, o2) * 5) / 5);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ package net.sourceforge.filebot.ui.rename;
|
|||
|
||||
import static java.util.Collections.*;
|
||||
import static net.sourceforge.filebot.MediaTypes.*;
|
||||
import static net.sourceforge.filebot.mediainfo.ReleaseInfo.*;
|
||||
import static net.sourceforge.filebot.mediainfo.MediaDetection.*;
|
||||
import static net.sourceforge.filebot.web.EpisodeUtilities.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
import static net.sourceforge.tuned.ui.TunedUtilities.*;
|
||||
|
@ -20,7 +20,6 @@ import java.util.LinkedHashSet;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
@ -44,7 +43,6 @@ import net.sourceforge.filebot.ui.SelectDialog;
|
|||
import net.sourceforge.filebot.web.Episode;
|
||||
import net.sourceforge.filebot.web.EpisodeListProvider;
|
||||
import net.sourceforge.filebot.web.SearchResult;
|
||||
import net.sourceforge.tuned.FileUtilities;
|
||||
|
||||
|
||||
class EpisodeListMatcher implements AutoCompleteMatcher {
|
||||
|
@ -169,24 +167,18 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
|||
@Override
|
||||
public List<Match<File, ?>> match(final List<File> files, final Locale locale, final boolean autodetection, final Component parent) throws Exception {
|
||||
// focus on movie and subtitle files
|
||||
final List<File> mediaFiles = FileUtilities.filter(files, VIDEO_FILES, SUBTITLE_FILES);
|
||||
final Map<File, List<File>> filesByFolder = mapByFolder(mediaFiles);
|
||||
|
||||
// do matching all at once
|
||||
if (filesByFolder.keySet().size() <= 5 || detectSeriesNames(mediaFiles).size() <= 5) {
|
||||
return matchEpisodeSet(mediaFiles, locale, autodetection, parent);
|
||||
}
|
||||
final List<File> mediaFiles = filter(files, VIDEO_FILES, SUBTITLE_FILES);;
|
||||
|
||||
// assume that many shows will be matched, do it folder by folder
|
||||
List<Callable<List<Match<File, ?>>>> taskPerFolder = new ArrayList<Callable<List<Match<File, ?>>>>();
|
||||
|
||||
// detect series names and create episode list fetch tasks
|
||||
for (final List<File> folder : filesByFolder.values()) {
|
||||
for (final Set<File> folder : mapFoldersBySeriesNames(mediaFiles).keySet()) {
|
||||
taskPerFolder.add(new Callable<List<Match<File, ?>>>() {
|
||||
|
||||
@Override
|
||||
public List<Match<File, ?>> call() throws Exception {
|
||||
return matchEpisodeSet(folder, locale, autodetection, parent);
|
||||
return matchEpisodeSet(new ArrayList<File>(folder), locale, autodetection, parent);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ package net.sourceforge.filebot.ui.rename;
|
|||
import static java.util.Arrays.*;
|
||||
import static java.util.Collections.*;
|
||||
import static net.sourceforge.filebot.MediaTypes.*;
|
||||
import static net.sourceforge.filebot.mediainfo.ReleaseInfo.*;
|
||||
import static net.sourceforge.filebot.mediainfo.MediaDetection.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
import static net.sourceforge.tuned.ui.TunedUtilities.*;
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ package net.sourceforge.filebot.ui.subtitle;
|
|||
|
||||
import static javax.swing.BorderFactory.*;
|
||||
import static javax.swing.JOptionPane.*;
|
||||
import static net.sourceforge.filebot.mediainfo.ReleaseInfo.*;
|
||||
import static net.sourceforge.filebot.mediainfo.MediaDetection.*;
|
||||
import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
import static net.sourceforge.tuned.StringUtilities.*;
|
||||
|
|
Loading…
Reference in New Issue