From 81d9b6a2f66be24469cd0d24118d4d787f0428ca Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 8 Sep 2016 10:58:10 +0800 Subject: [PATCH] Improved movie grouping for subtitle files --- .../net/filebot/format/MediaBindingBean.java | 15 +++--- source/net/filebot/media/MediaDetection.java | 11 +++++ .../filebot/subtitle/SubtitleUtilities.java | 48 ++++++++++++------- 3 files changed, 49 insertions(+), 25 deletions(-) diff --git a/source/net/filebot/format/MediaBindingBean.java b/source/net/filebot/format/MediaBindingBean.java index 9b29e8bf..e5963e1d 100644 --- a/source/net/filebot/format/MediaBindingBean.java +++ b/source/net/filebot/format/MediaBindingBean.java @@ -558,17 +558,18 @@ public class MediaBindingBean { @Define("lang") public Language getLanguageTag() throws Exception { - Locale languageSuffix = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile())); - if (languageSuffix != null) { - return Language.getLanguage(languageSuffix); + // grep language from filename + Locale languageTag = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile())); + if (languageTag != null) { + return Language.getLanguage(languageTag); } - // try to auto-detect subtitle language + // detect language from subtitle text content if (SUBTITLE_FILES.accept(getMediaFile())) { try { - return Language.getLanguage(detectSubtitleLanguage(getMediaFile())); - } catch (Throwable e) { - throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e); + return detectSubtitleLanguage(getMediaFile()); + } catch (Exception e) { + throw new RuntimeException("Failed to detect subtitle language: " + e, e); } } diff --git a/source/net/filebot/media/MediaDetection.java b/source/net/filebot/media/MediaDetection.java index 81d8261c..d3076b39 100644 --- a/source/net/filebot/media/MediaDetection.java +++ b/source/net/filebot/media/MediaDetection.java @@ -9,6 +9,7 @@ import static net.filebot.MediaTypes.*; import static net.filebot.media.XattrMetaInfo.*; import static net.filebot.similarity.CommonSequenceMatcher.*; import static net.filebot.similarity.Normalization.*; +import static net.filebot.subtitle.SubtitleUtilities.*; import static net.filebot.util.FileUtilities.*; import static net.filebot.util.RegularExpressions.*; import static net.filebot.util.StringUtilities.*; @@ -43,6 +44,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import net.filebot.ApplicationFolder; +import net.filebot.Language; import net.filebot.Resource; import net.filebot.WebServices; import net.filebot.archive.Archive; @@ -1107,6 +1109,15 @@ public class MediaDetection { } catch (Exception e) { debug.warning(format("Failed to read media characteristics: %s", e.getMessage())); } + } else if (SUBTITLE_FILES.accept(f) && f.length() > ONE_KILOBYTE) { + try { + Language language = detectSubtitleLanguage(f); + if (language != null) { + return asList(language.getCode()); + } + } catch (Exception e) { + debug.warning(format("Failed to detect subtitle language: %s", e.getMessage())); + } } return emptyList(); })).forEach((group, videos) -> groups.add(videos)); diff --git a/source/net/filebot/subtitle/SubtitleUtilities.java b/source/net/filebot/subtitle/SubtitleUtilities.java index 6861aa9d..a0c88972 100644 --- a/source/net/filebot/subtitle/SubtitleUtilities.java +++ b/source/net/filebot/subtitle/SubtitleUtilities.java @@ -33,6 +33,15 @@ import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; +import com.optimaize.langdetect.DetectedLanguage; +import com.optimaize.langdetect.LanguageDetector; +import com.optimaize.langdetect.LanguageDetectorBuilder; +import com.optimaize.langdetect.i18n.LdLocale; +import com.optimaize.langdetect.ngram.NgramExtractors; +import com.optimaize.langdetect.profiles.BuiltInLanguages; +import com.optimaize.langdetect.profiles.LanguageProfile; +import com.optimaize.langdetect.profiles.LanguageProfileReader; + import net.filebot.Language; import net.filebot.similarity.EpisodeMetrics; import net.filebot.similarity.Match; @@ -53,15 +62,6 @@ import net.filebot.web.SubtitleProvider; import net.filebot.web.SubtitleSearchResult; import net.filebot.web.VideoHashSubtitleService; -import com.optimaize.langdetect.DetectedLanguage; -import com.optimaize.langdetect.LanguageDetector; -import com.optimaize.langdetect.LanguageDetectorBuilder; -import com.optimaize.langdetect.i18n.LdLocale; -import com.optimaize.langdetect.ngram.NgramExtractors; -import com.optimaize.langdetect.profiles.BuiltInLanguages; -import com.optimaize.langdetect.profiles.LanguageProfile; -import com.optimaize.langdetect.profiles.LanguageProfileReader; - public final class SubtitleUtilities { public static Map> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection files, String languageName, boolean addOptions, boolean strict) throws Exception { @@ -437,19 +437,31 @@ public final class SubtitleUtilities { return new MemoryFile(descriptor.getPath(), data); } - public static String detectSubtitleLanguage(File file) throws IOException { - MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file))); - String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n")); - - // detect language - List probabilities = createLanguageDetector().getProbabilities(subtitleText); - - if (probabilities.size() > 0) { - return probabilities.get(0).getLocale().getLanguage(); + public static Language detectSubtitleLanguage(File file) throws IOException { + // grep language from filename + Locale languageTag = releaseInfo.getSubtitleLanguageTag(getName(file)); + if (languageTag != null) { + return Language.getLanguage(languageTag); } + + // detect language from subtitle text content + MemoryFile data = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file))); + List options = detectSubtitleLanguage(data); + if (options.size() > 0) { + return Language.getLanguage(options.get(0).getLocale().getLanguage()); + } + return null; } + public static List detectSubtitleLanguage(MemoryFile file) throws IOException { + // decode subtitles + String text = decodeSubtitles(file).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n")); + + // detect text language + return createLanguageDetector().getProbabilities(text); + } + private static LanguageDetectorBuilder languageDetector; private static LanguageDetector createLanguageDetector() throws IOException {