* fine-tune subtitle auto-selection

This commit is contained in:
Reinhard Pointner 2015-05-24 22:54:56 +00:00
parent 3e4da0f254
commit 1c928e5592
3 changed files with 181 additions and 22 deletions

View File

@ -0,0 +1,156 @@
package net.filebot.subtitle;
import static java.util.Collections.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.similarity.EpisodeMetrics.*;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.filebot.mediainfo.MediaInfo;
import net.filebot.mediainfo.MediaInfo.StreamKind;
import net.filebot.similarity.CrossPropertyMetric;
import net.filebot.similarity.EpisodeMetrics;
import net.filebot.similarity.MetricAvg;
import net.filebot.similarity.MetricCascade;
import net.filebot.similarity.NameSimilarityMetric;
import net.filebot.similarity.NumericSimilarityMetric;
import net.filebot.similarity.SimilarityMetric;
import net.filebot.web.OpenSubtitlesSubtitleDescriptor;
import net.filebot.web.SubtitleDescriptor;
public enum SubtitleMetrics implements SimilarityMetric {
// subtitle verification metric specifically excluding SxE mismatches
AbsoluteSeasonEpisode(new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
float f = SeasonEpisode.getSimilarity(o1, o2);
if (f == 0 && (getEpisodeIdentifier(o1.toString(), true) == null) == (getEpisodeIdentifier(o2.toString(), true) == null)) {
return 0;
}
return f < 1 ? -1 : 1;
}
}),
DiskNumber(new NumericSimilarityMetric() {
private final Pattern CDNO = Pattern.compile("(?:CD|DISK)(\\d+)", Pattern.CASE_INSENSITIVE);
@Override
public float getSimilarity(Object o1, Object o2) {
int c1 = getDiskNumber(o1);
int c2 = getDiskNumber(o2);
if (c1 == 0 && c2 == 0) // undefined
return 0;
return c1 == c2 ? 1 : -1; // positive or negative match
}
public int getDiskNumber(Object o) {
int cd = 0;
Matcher matcher = CDNO.matcher(o.toString());
while (matcher.find()) {
cd = Integer.parseInt(matcher.group(1));
}
return cd;
}
}),
VideoProperties(new CrossPropertyMetric() {
private final String FPS = "FPS";
private final String SECONDS = "SECS";
public float getSimilarity(Object o1, Object o2) {
return o1 instanceof SubtitleDescriptor ? super.getSimilarity(o1, o2) : super.getSimilarity(o2, o1); // make sure that SubtitleDescriptor is o1
};
protected Map<String, Object> getProperties(Object object) {
if (object instanceof OpenSubtitlesSubtitleDescriptor) {
return getSubtitleProperties((OpenSubtitlesSubtitleDescriptor) object);
} else if (object instanceof File) {
return getVideoProperties((File) object);
}
return emptyMap();
};
private Map<String, Object> getSubtitleProperties(OpenSubtitlesSubtitleDescriptor subtitle) {
Map<String, Object> props = new HashMap<String, Object>();
try {
float fps = Math.round(subtitle.getMovieFPS()); // round because most FPS values in the database are bad anyway
if (fps > 0) {
props.put(FPS, fps);
}
long seconds = (long) Math.floor(subtitle.getMovieTimeMS() / (double) 1000);
if (seconds > 0) {
props.put(SECONDS, seconds);
}
} catch (Exception e) {
e.printStackTrace();
}
return props;
}
private final Map<File, Map<String, Object>> mediaInfoCache = new WeakHashMap<File, Map<String, Object>>(64);
private Map<String, Object> getVideoProperties(File file) {
synchronized (mediaInfoCache) {
return mediaInfoCache.computeIfAbsent(file, (f) -> {
try {
Map<String, Object> props = new HashMap<String, Object>();
MediaInfo mediaInfo = new MediaInfo();
if (mediaInfo.open(file)) {
float fps = Math.round(Float.parseFloat(mediaInfo.get(StreamKind.Video, 0, "FrameRate")));
if (fps > 0) {
props.put(FPS, fps);
}
long seconds = (long) Math.floor(Long.parseLong(mediaInfo.get(StreamKind.Video, 0, "Duration")) / (double) 1000);
if (seconds > 0) {
props.put(SECONDS, seconds);
}
}
return props;
} catch (Exception e) {
return emptyMap();
}
});
}
}
});
// inner metric
private final SimilarityMetric metric;
private SubtitleMetrics(SimilarityMetric metric) {
this.metric = metric;
}
@Override
public float getSimilarity(Object o1, Object o2) {
return metric.getSimilarity(o1, o2);
}
public static SimilarityMetric[] defaultSequence() {
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, FileName, DiskNumber, VideoProperties, new NameSimilarityMetric() };
}
public static SimilarityMetric verificationMetric() {
return EpisodeMetrics.verificationMetric();
}
public static SimilarityMetric sanityMetric() {
return new MetricCascade(AbsoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name), getMovieMatchMetric());
}
}

View File

@ -4,7 +4,6 @@ import static java.lang.Math.*;
import static java.util.Collections.*;
import static net.filebot.MediaTypes.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.similarity.EpisodeMetrics.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
@ -35,7 +34,6 @@ import net.filebot.similarity.EpisodeMetrics;
import net.filebot.similarity.Match;
import net.filebot.similarity.Matcher;
import net.filebot.similarity.MetricAvg;
import net.filebot.similarity.MetricCascade;
import net.filebot.similarity.NameSimilarityMetric;
import net.filebot.similarity.SequenceMatchSimilarity;
import net.filebot.similarity.SimilarityMetric;
@ -139,11 +137,11 @@ public final class SubtitleUtilities {
}
// add other possible matches to the options
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
SimilarityMetric sanity = SubtitleMetrics.verificationMetric();
float minMatchSimilarity = strict ? 0.9f : 0.6f;
// first match everything as best as possible, then filter possibly bad matches
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles).entrySet()) {
if (sanity.getSimilarity(it.getKey(), it.getValue()) >= minMatchSimilarity) {
subtitlesByFile.get(it.getKey()).add(it.getValue());
}
@ -178,31 +176,20 @@ public final class SubtitleUtilities {
return subtitlesByFile;
}
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles) throws InterruptedException {
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
// optimize for generic media <-> subtitle matching
SimilarityMetric[] metrics = new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, new NameSimilarityMetric() };
// subtitle verification metric specifically excluding SxE mismatches
SimilarityMetric absoluteSeasonEpisode = new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
float f = SeasonEpisode.getSimilarity(o1, o2);
if (f == 0 && (getEpisodeIdentifier(o1.toString(), true) == null) == (getEpisodeIdentifier(o2.toString(), true) == null)) {
return 0;
}
return f < 1 ? -1 : 1;
}
};
SimilarityMetric sanity = new MetricCascade(absoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name), getMovieMatchMetric());
SimilarityMetric[] metrics = SubtitleMetrics.defaultSequence();
// first match everything as best as possible, then filter possibly bad matches
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
SimilarityMetric sanity = SubtitleMetrics.sanityMetric();
float minSanitySimilarity = 0.1f;
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.6f)) {
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= minSanitySimilarity) {
subtitleByVideo.put(it.getValue(), it.getCandidate());
}
}
@ -250,7 +237,7 @@ public final class SubtitleUtilities {
}
try {
return matchSubtitles(singleton(file), subtitles, strict).entrySet().iterator().next().getValue();
return matchSubtitles(singleton(file), subtitles).entrySet().iterator().next().getValue();
} catch (NoSuchElementException e) {
return null;
} catch (InterruptedException e) {

View File

@ -96,6 +96,22 @@ public class OpenSubtitlesSubtitleDescriptor implements SubtitleDescriptor, Seri
return Integer.parseInt(getProperty(Property.QueryNumber));
}
public float getMovieFPS() {
return Float.parseFloat(getProperty(Property.MovieFPS));
}
public long getMovieTimeMS() {
return Long.parseLong(getProperty(Property.MovieTimeMS));
}
public int getSubActualCD() {
return Integer.parseInt(getProperty(Property.SubActualCD));
}
public int getSubSumCD() {
return Integer.parseInt(getProperty(Property.SubSumCD));
}
@Override
public ByteBuffer fetch() throws Exception {
URL resource = new URL(getProperty(Property.SubDownloadLink));