* lots of fine-tuning and adjustments for fully-automated movie detection and episode matching

@see http://www.filebot.net/forums/viewtopic.php?f=4&t=832&p=5360#p5360
This commit is contained in:
Reinhard Pointner 2013-07-23 19:06:49 +00:00
parent 6b870fa1bc
commit 4581f2221f
8 changed files with 68 additions and 17 deletions

View File

@ -897,7 +897,7 @@ public class CmdlineOperations implements CmdlineInterface {
}
// return first and only value
return probableMatches;
return probableMatches.size() <= 5 ? probableMatches : probableMatches.subList(0, 5);
}

View File

@ -45,6 +45,7 @@ import net.sourceforge.filebot.similarity.DateMatcher;
import net.sourceforge.filebot.similarity.DateMetric;
import net.sourceforge.filebot.similarity.MetricAvg;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.NumericSimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SeasonEpisodePattern;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
@ -589,18 +590,35 @@ public class MediaDetection {
}
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException {
List<String> paragon = stripReleaseInfo(terms, true);
SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new NameSimilarityMetric() {
private static SimilarityMetric getMovieMatchMetric() {
return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true), new NumericSimilarityMetric() {
private Pattern year = Pattern.compile("\\b\\d{4}\\b");
@Override
protected String normalize(Object object) {
return super.normalize(stripReleaseInfo(object.toString()).replaceAll("\\D+", " ")); // similarity of number patterns
Matcher ym = year.matcher(object.toString());
StringBuilder sb = new StringBuilder();
while (ym.find()) {
sb.append(ym.group()).append(' ');
}
return sb.toString().trim();
}
@Override
public float getSimilarity(Object o1, Object o2) {
return super.getSimilarity(o1, o2) * 2; // DOUBLE WEIGHT FOR YEAR MATCH
}
});
}
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException {
List<String> paragon = stripReleaseInfo(terms, true);
List<T> sorted = new ArrayList<T>(options);
sort(sorted, new SimilarityComparator(metric, paragon.toArray()));
sort(sorted, new SimilarityComparator(getMovieMatchMetric(), paragon.toArray()));
return sorted;
}
@ -801,8 +819,8 @@ public class MediaDetection {
// DEBUG
// System.out.format("Query %s: %s%n", queryLookupService.getName(), querySet);
final SimilarityMetric metric = new NameSimilarityMetric();
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
final SimilarityMetric metric = getMovieMatchMetric();
for (String query : querySet) {
for (Movie movie : queryLookupService.searchMovie(query.toLowerCase(), locale)) {
probabilityMap.put(movie, metric.getSimilarity(query, movie));

View File

@ -57,12 +57,8 @@ public class EpisodeMatcher extends Matcher<File, Object> {
if (uniqueFiles.equals(uniqueEpisodes)) {
Episode[] episodes = episodeSets.get(file).toArray(new Episode[0]);
Set<String> seriesNames = new HashSet<String>();
for (Episode ep : episodes) {
seriesNames.add(ep.getSeriesName());
}
if (seriesNames.size() == 1) {
if (isMultiEpisode(episodes)) {
MultiEpisode episode = new MultiEpisode(episodes);
disjointMatchCollection.add(new Match<File, Object>(file, episode));
modified = true;
@ -99,4 +95,27 @@ public class EpisodeMatcher extends Matcher<File, Object> {
return result;
}
private boolean isMultiEpisode(Episode[] episodes) {
// check episode sequence integrity
Integer seqIndex = null;
for (Episode ep : episodes) {
if (seqIndex != null && !ep.getEpisode().equals(seqIndex + 1))
return false;
seqIndex = ep.getEpisode();
}
// check drill-down integrity
String seriesName = null;
for (Episode ep : episodes) {
if (seriesName != null && !seriesName.equals(ep.getSeriesName()))
return false;
seriesName = ep.getSeriesName();
}
return true;
}
}

View File

@ -503,7 +503,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
if (seriesInfo != null) {
if (seriesInfo.getRatingCount() > 0) {
float rating = max(0, seriesInfo.getRating().floatValue());
return seriesInfo.getRatingCount() >= 15 ? rating : rating / 2; // PENALIZE SHOWS WITH FEW RATINGS
return seriesInfo.getRatingCount() >= 15 ? rating : 0; // PENALIZE SHOWS WITH FEW RATINGS
} else {
return -1; // BIG PENALTY FOR SHOWS WITH 0 RATINGS
}

View File

@ -11,7 +11,17 @@ import java.util.Locale;
public class SequenceMatchSimilarity implements SimilarityMetric {
private final CommonSequenceMatcher commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), 10, false);
private final CommonSequenceMatcher commonSequenceMatcher;
public SequenceMatchSimilarity() {
this(10, false);
}
public SequenceMatchSimilarity(int commonSequenceMaxStartIndex, boolean returnFirstMatch) {
this.commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), commonSequenceMaxStartIndex, returnFirstMatch);
}
@Override

View File

@ -92,6 +92,7 @@ A.Release.Lounge
ABC
AC3D
Addic7ed.com
AMC-TEST
Anaglyph
Anime[s]?
Arte
@ -142,6 +143,7 @@ Extended.Version
ExtraScene
ExtraTorrent
Fantasy
FileBot
Final.Cut
FIXED
Fra
@ -244,6 +246,7 @@ tt\d{2,8}
TVOON.DE
tvp
tvrecorder
UE
ultimate.edition
UNCUT
unrated

View File

@ -543,6 +543,7 @@ DeTvaVe
DEViSE
DEWSTRR
DFA
DFE
DFQ
DFTU
DGAS

View File

@ -145,10 +145,10 @@ def groups = input.groupBy{ f ->
def mn = norm(mov.name)
// S00E00 | 2012.07.21 | One Piece 217 | Firefly - Serenity | [Taken 1, Taken 2, Taken 3, Taken 4, ..., Taken 10]
if ((parseEpisodeNumber(fn, true) || parseDate(fn) || ([dn, fn].find{ it =~ sn && matchMovie(it, true) == null } && (parseEpisodeNumber(fn.after(sn), false) || fn.after(sn) =~ /\d{1,2}\D+\d{1,2}/) && matchMovie(fn, true) == null) || (fn.after(sn) ==~ /.{0,3} - .+/ && matchMovie(fn, true) == null) || f.dir.listFiles{ it.isVideo() && norm(it.name) =~ sn && it.name =~ /\b\d{1,3}\b/}.size() >= 10) && !tryQuietly{ def m = detectMovie(f, true); m.year >= 1950 && f.listPath().reverse().take(3).find{ it.name =~ m.year } }) {
if ((parseEpisodeNumber(fn, true) || parseDate(fn) || ([dn, fn].find{ it =~ sn && matchMovie(it, true) == null } && (parseEpisodeNumber(fn.after(sn), false) || fn.after(sn) =~ /\d{1,2}\D+\d{1,2}/) && matchMovie(fn, true) == null) || (fn.after(sn) ==~ /.{0,3} - .+/ && matchMovie(fn, true) == null) || f.dir.listFiles{ it.isVideo() && norm(it.name) =~ sn && it.name =~ /\b\d{1,3}\b/}.size() >= 10) && !(mov.year >= 1950 && f.listPath().reverse().take(3).find{ it.name =~ mov.year }) || mov.year < 1900) {
_log.fine("Exclude Movie: $mov")
mov = null
} else if (mn ==~ fn || (detectMovie(f, true) && [dn, fn].find{ it =~ /(19|20)\d{2}/ }) || [dn, fn].find{ it =~ mn && !(it.after(mn) =~ /\b\d{1,3}\b/) && !(it.before(mn).contains(sn)) }) {
} else if (mn ==~ fn || [dn, fn].find{ it =~ /\b/+mov.year+/\b/ } || [dn, fn].find{ it =~ mn && !(it.after(mn) =~ /\b\d{1,3}\b/) && !(it.before(mn).contains(sn)) } || (detectMovie(f, true) && [dn, fn].find{ it =~ /(19|20)\d{2}/ })) {
_log.fine("Exclude Series: $tvs")
tvs = null
}