* improved series lookup for series-name-without-spacing type naming, the worst of all naming styles... e.g. isd-thewalkingdead-s03e12.avi
This commit is contained in:
parent
2793321715
commit
18df1820a7
|
@ -2,6 +2,7 @@
|
||||||
package net.sourceforge.filebot.media;
|
package net.sourceforge.filebot.media;
|
||||||
|
|
||||||
|
|
||||||
|
import static java.util.Arrays.*;
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static java.util.regex.Pattern.*;
|
import static java.util.regex.Pattern.*;
|
||||||
import static net.sourceforge.filebot.MediaTypes.*;
|
import static net.sourceforge.filebot.MediaTypes.*;
|
||||||
|
@ -51,6 +52,7 @@ import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
||||||
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
import net.sourceforge.filebot.similarity.SimilarityComparator;
|
||||||
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
||||||
|
import net.sourceforge.filebot.web.AnidbClient.AnidbSearchResult;
|
||||||
import net.sourceforge.filebot.web.Date;
|
import net.sourceforge.filebot.web.Date;
|
||||||
import net.sourceforge.filebot.web.Episode;
|
import net.sourceforge.filebot.web.Episode;
|
||||||
import net.sourceforge.filebot.web.Movie;
|
import net.sourceforge.filebot.web.Movie;
|
||||||
|
@ -283,7 +285,7 @@ public class MediaDetection {
|
||||||
Set<String> filenames = new LinkedHashSet<String>();
|
Set<String> filenames = new LinkedHashSet<String>();
|
||||||
for (File f : files) {
|
for (File f : files) {
|
||||||
for (int i = 0; i < 3 && f != null; i++, f = f.getParentFile()) {
|
for (int i = 0; i < 3 && f != null; i++, f = f.getParentFile()) {
|
||||||
(i == 0 ? filenames : folders).add(normalizeBrackets(f.getName()));
|
(i == 0 ? filenames : folders).add(normalizeBrackets(getName(f)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -302,6 +304,22 @@ public class MediaDetection {
|
||||||
matches.addAll(matchSeriesByName(filenames, 3));
|
matches.addAll(matchSeriesByName(filenames, 3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// assume name without spacing will mess up any lookup
|
||||||
|
if (matches.isEmpty()) {
|
||||||
|
// try to narrow down file to series name as best as possible
|
||||||
|
SeriesNameMatcher snm = new SeriesNameMatcher();
|
||||||
|
String[] sns = filenames.toArray(new String[0]);
|
||||||
|
for (int i = 0; i < sns.length; i++) {
|
||||||
|
String sn = snm.matchByEpisodeIdentifier(sns[i]);
|
||||||
|
if (sn != null) {
|
||||||
|
sns[i] = sn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(asList(sns), false), true)) {
|
||||||
|
matches.add(it.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// pass along only valid terms
|
// pass along only valid terms
|
||||||
names.addAll(stripBlacklistedTerms(matches));
|
names.addAll(stripBlacklistedTerms(matches));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -341,15 +359,40 @@ public class MediaDetection {
|
||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<Entry<String, SearchResult>> seriesIndex = new ArrayList<Entry<String, SearchResult>>(75000);
|
||||||
|
|
||||||
|
|
||||||
|
public static synchronized List<Entry<String, SearchResult>> getSeriesIndex() throws IOException {
|
||||||
|
if (seriesIndex.isEmpty()) {
|
||||||
|
try {
|
||||||
|
for (TheTVDBSearchResult it : releaseInfo.getTheTVDBIndex()) {
|
||||||
|
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(it.getName()).toLowerCase(), it));
|
||||||
|
}
|
||||||
|
for (AnidbSearchResult it : releaseInfo.getAnidbIndex()) {
|
||||||
|
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(it.getPrimaryTitle()).toLowerCase(), it));
|
||||||
|
if (it.getEnglishTitle() != null) {
|
||||||
|
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(it.getEnglishTitle()).toLowerCase(), it));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// can't load movie index, just try again next time
|
||||||
|
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.SEVERE, "Failed to load series index: " + e.getMessage(), e);
|
||||||
|
return emptyList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return seriesIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static List<String> matchSeriesByName(Collection<String> names, int maxStartIndex) throws Exception {
|
public static List<String> matchSeriesByName(Collection<String> names, int maxStartIndex) throws Exception {
|
||||||
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
|
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
|
||||||
List<String> matches = new ArrayList<String>();
|
List<String> matches = new ArrayList<String>();
|
||||||
|
|
||||||
String[] seriesIndex = releaseInfo.getSeriesList();
|
|
||||||
for (String name : names) {
|
for (String name : names) {
|
||||||
String bestMatch = "";
|
String bestMatch = "";
|
||||||
for (String identifier : seriesIndex) {
|
for (Entry<String, SearchResult> it : getSeriesIndex()) {
|
||||||
|
String identifier = it.getKey();
|
||||||
String commonName = nameMatcher.matchFirstCommonSequence(name, identifier);
|
String commonName = nameMatcher.matchFirstCommonSequence(name, identifier);
|
||||||
if (commonName != null && commonName.length() >= identifier.length() && commonName.length() > bestMatch.length()) {
|
if (commonName != null && commonName.length() >= identifier.length() && commonName.length() > bestMatch.length()) {
|
||||||
bestMatch = commonName;
|
bestMatch = commonName;
|
||||||
|
@ -373,6 +416,38 @@ public class MediaDetection {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException {
|
||||||
|
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
|
||||||
|
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
|
||||||
|
|
||||||
|
List<String> terms = new ArrayList<String>(names.size());
|
||||||
|
for (String it : names) {
|
||||||
|
String term = spacing.matcher(it).replaceAll("").toLowerCase();
|
||||||
|
if (term.length() >= 3) {
|
||||||
|
terms.add(term); // only consider words, not just random letters
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// similarity threshold based on strict/non-strict
|
||||||
|
SimilarityMetric metric = new NameSimilarityMetric();
|
||||||
|
float similarityThreshold = strict ? 0.75f : 0.5f;
|
||||||
|
|
||||||
|
List<SearchResult> seriesList = new ArrayList<SearchResult>();
|
||||||
|
for (Entry<String, SearchResult> it : getSeriesIndex()) {
|
||||||
|
String name = spacing.matcher(it.getKey()).replaceAll("").toLowerCase();
|
||||||
|
for (String term : terms) {
|
||||||
|
if (term.contains(name)) {
|
||||||
|
if (metric.getSimilarity(term, name) >= similarityThreshold) {
|
||||||
|
seriesList.add(it.getValue());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return seriesList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static Collection<Movie> detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception {
|
public static Collection<Movie> detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception {
|
||||||
Set<Movie> options = new LinkedHashSet<Movie>();
|
Set<Movie> options = new LinkedHashSet<Movie>();
|
||||||
|
|
||||||
|
@ -563,15 +638,13 @@ public class MediaDetection {
|
||||||
return matches != null && matches.size() > 0 ? matches.get(0) : null;
|
return matches != null && matches.size() > 0 ? matches.get(0) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Entry<String, Movie>> movieIndex;
|
private static List<Entry<String, Movie>> movieIndex = new ArrayList<Entry<String, Movie>>(100000);
|
||||||
|
|
||||||
|
|
||||||
private static synchronized List<Entry<String, Movie>> getMovieIndex() throws IOException {
|
public static synchronized List<Entry<String, Movie>> getMovieIndex() throws IOException {
|
||||||
if (movieIndex == null) {
|
if (movieIndex.isEmpty()) {
|
||||||
try {
|
try {
|
||||||
Movie[] movies = releaseInfo.getMovieList();
|
for (Movie movie : releaseInfo.getMovieList()) {
|
||||||
movieIndex = new ArrayList<Entry<String, Movie>>(movies.length);
|
|
||||||
for (Movie movie : movies) {
|
|
||||||
movieIndex.add(new SimpleEntry<String, Movie>(normalizePunctuation(movie.getName()).toLowerCase(), movie));
|
movieIndex.add(new SimpleEntry<String, Movie>(normalizePunctuation(movie.getName()).toLowerCase(), movie));
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -7,13 +7,11 @@ import static java.util.Collections.*;
|
||||||
import static java.util.ResourceBundle.*;
|
import static java.util.ResourceBundle.*;
|
||||||
import static java.util.regex.Pattern.*;
|
import static java.util.regex.Pattern.*;
|
||||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import static net.sourceforge.tuned.FileUtilities.*;
|
|
||||||
import static net.sourceforge.tuned.StringUtilities.*;
|
import static net.sourceforge.tuned.StringUtilities.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileFilter;
|
import java.io.FileFilter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.text.Collator;
|
import java.text.Collator;
|
||||||
|
@ -238,11 +236,6 @@ public class ReleaseInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String[] getSeriesList() throws IOException {
|
|
||||||
return seriesListResource.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public TheTVDBSearchResult[] getTheTVDBIndex() throws IOException {
|
public TheTVDBSearchResult[] getTheTVDBIndex() throws IOException {
|
||||||
return tvdbIndexResource.get();
|
return tvdbIndexResource.get();
|
||||||
}
|
}
|
||||||
|
@ -279,7 +272,6 @@ public class ReleaseInfo {
|
||||||
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
||||||
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
|
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
|
||||||
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
||||||
protected final CachedResource<String[]> seriesListResource = new SeriesListResource(getBundle(getClass().getName()).getString("url.series-list"));
|
|
||||||
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getBundle(getClass().getName()).getString("url.series-mappings"));
|
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getBundle(getClass().getName()).getString("url.series-mappings"));
|
||||||
protected final CachedResource<TheTVDBSearchResult[]> tvdbIndexResource = new TheTVDBIndexResource(getBundle(getClass().getName()).getString("url.thetvdb-index"));
|
protected final CachedResource<TheTVDBSearchResult[]> tvdbIndexResource = new TheTVDBIndexResource(getBundle(getClass().getName()).getString("url.thetvdb-index"));
|
||||||
protected final CachedResource<AnidbSearchResult[]> anidbIndexResource = new AnidbIndexResource(getBundle(getClass().getName()).getString("url.anidb-index"));
|
protected final CachedResource<AnidbSearchResult[]> anidbIndexResource = new AnidbIndexResource(getBundle(getClass().getName()).getString("url.anidb-index"));
|
||||||
|
@ -323,20 +315,6 @@ public class ReleaseInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected static class SeriesListResource extends CachedResource<String[]> {
|
|
||||||
|
|
||||||
public SeriesListResource(String resource) {
|
|
||||||
super(resource, String[].class, 7 * 24 * 60 * 60 * 1000); // check for updates once a week
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String[] process(ByteBuffer data) throws IOException {
|
|
||||||
return readAll(new InputStreamReader(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8")).split("\\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected static class TheTVDBIndexResource extends CachedResource<TheTVDBSearchResult[]> {
|
protected static class TheTVDBIndexResource extends CachedResource<TheTVDBSearchResult[]> {
|
||||||
|
|
||||||
public TheTVDBIndexResource(String resource) {
|
public TheTVDBIndexResource(String resource) {
|
||||||
|
|
|
@ -19,9 +19,6 @@ url.series-mappings: http://filebot.net/data/series-mappings.txt
|
||||||
# list of all movies (id, name, year)
|
# list of all movies (id, name, year)
|
||||||
url.movie-list: http://filebot.net/data/movies.txt.gz
|
url.movie-list: http://filebot.net/data/movies.txt.gz
|
||||||
|
|
||||||
# list of tv show and anime names
|
|
||||||
url.series-list: http://filebot.net/data/series.list.gz
|
|
||||||
|
|
||||||
# TheTVDB index
|
# TheTVDB index
|
||||||
url.thetvdb-index: http://filebot.net/data/thetvdb.txt.gz
|
url.thetvdb-index: http://filebot.net/data/thetvdb.txt.gz
|
||||||
|
|
||||||
|
|
|
@ -90,7 +90,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Set<String> getFields(AnidbSearchResult anime) {
|
protected Set<String> getFields(AnidbSearchResult anime) {
|
||||||
return set(anime.getPrimaryTitle(), anime.getOfficialTitle("en"));
|
return set(anime.getPrimaryTitle(), anime.getEnglishTitle());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -263,6 +263,11 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String getEnglishTitle() {
|
||||||
|
return officialTitle != null ? officialTitle.get("en") : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public String getOfficialTitle(String key) {
|
public String getOfficialTitle(String key) {
|
||||||
return officialTitle != null ? officialTitle.get(key) : null;
|
return officialTitle != null ? officialTitle.get(key) : null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -793,6 +793,7 @@ iNVANDRAREN
|
||||||
iON
|
iON
|
||||||
iRB
|
iRB
|
||||||
iRoNiCs
|
iRoNiCs
|
||||||
|
iSD
|
||||||
iSG
|
iSG
|
||||||
iSRAELiTE
|
iSRAELiTE
|
||||||
iTA
|
iTA
|
||||||
|
|
Loading…
Reference in New Issue