+ subtitle language auto-detection for {lang} binding

This commit is contained in:
Reinhard Pointner 2016-01-31 16:13:04 +00:00
parent 5bf402a5b5
commit 9f2b63121f
8 changed files with 86 additions and 34 deletions

View File

@ -30,5 +30,7 @@
<classpathentry kind="lib" path="lib/ivy/jar/sevenzipjbinding.jar"/>
<classpathentry kind="lib" path="lib/ivy/bundle/json-io.jar"/>
<classpathentry kind="lib" path="lib/ivy/jar/jna-platform.jar"/>
<classpathentry kind="lib" path="lib/ivy/jar/language-detector.jar"/>
<classpathentry kind="lib" path="lib/ivy/bundle/guava.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

View File

@ -183,6 +183,15 @@
<include name="com/github/junrar/**" />
</zipfileset>
<zipfileset src="${dir.lib}/ivy/jar/language-detector.jar">
<include name="com/**" />
<include name="languages/**" />
</zipfileset>
<zipfileset src="${dir.lib}/ivy/bundle/guava.jar">
<include name="com/google/**" />
</zipfileset>
<!-- include classes and native libraries -->
<zipfileset src="${dir.lib}/ivy/jar/jna.jar">
<include name="com/sun/jna/**" />

View File

@ -23,6 +23,7 @@
<dependency org="com.fifesoft" name="rsyntaxtextarea" rev="2.5.8" />
<dependency org="net.sf.sevenzipjbinding" name="sevenzipjbinding" rev="9.20-2.00beta" />
<dependency org="net.sf.sevenzipjbinding" name="sevenzipjbinding-all-platforms" rev="9.20-2.00beta" />
<dependency org="com.optimaize.languagedetector" name="language-detector" rev="0.5" />
<!-- FileBot Scripting -->
<dependency org="org.apache.ant" name="ant" rev="1.9.6" />

View File

@ -91,9 +91,12 @@ public class Language implements Serializable {
};
public static Language getLanguage(String code) {
ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName());
if (code == null || code.isEmpty()) {
return null;
}
try {
ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName());
String[] values = bundle.getString(code).split("\\t", 3);
return new Language(code, values[0], values[1], values[2].split("\\t"));
} catch (Exception e) {

View File

@ -8,6 +8,7 @@ import static net.filebot.format.ExpressionFormatMethods.*;
import static net.filebot.hash.VerificationUtilities.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.subtitle.SubtitleUtilities.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import static net.filebot.web.EpisodeFormat.*;
@ -32,7 +33,6 @@ import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import net.filebot.Cache;
import net.filebot.Language;
import net.filebot.MediaTypes;
import net.filebot.MetaAttributeView;
@ -512,14 +512,19 @@ public class MediaBindingBean {
}
@Define("lang")
public Language detectSubtitleLanguage() throws Exception {
public Language getSubtitleLanguage() throws Exception {
Locale languageSuffix = releaseInfo.getLanguageSuffix(FileUtilities.getName(getMediaFile()));
if (languageSuffix != null)
if (languageSuffix != null) {
return Language.getLanguage(languageSuffix);
}
// require subtitle file
if (!SUBTITLE_FILES.accept(getMediaFile())) {
return null;
// try to auto-detect subtitle language
if (SUBTITLE_FILES.accept(getMediaFile())) {
try {
return Language.getLanguage(detectSubtitleLanguage(getMediaFile()));
} catch (Throwable e) {
throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e);
}
}
return null;
@ -1024,21 +1029,6 @@ public class MediaBindingBean {
return bindings;
}
private String crc32(File file) throws IOException, InterruptedException {
// try to get checksum from cache
Cache cache = Cache.getCache(Cache.EPHEMERAL);
String hash = cache.get(file, String.class);
if (hash != null) {
return hash;
}
// compute and cache checksum
hash = computeHash(file, HashType.SFV);
cache.put(file, hash);
return hash;
}
private String getOriginalFileName(File file) {
try {
return getNameWithoutExtension(new MetaAttributes(file).getOriginalName());

View File

@ -10,6 +10,8 @@ import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.filebot.Cache;
public final class VerificationUtilities {
/**
@ -103,6 +105,21 @@ public final class VerificationUtilities {
return hash.digest();
}
public static String crc32(File file) throws IOException, InterruptedException {
// try to get checksum from cache
Cache cache = Cache.getCache(Cache.EPHEMERAL);
String hash = cache.get(file, String.class);
if (hash != null) {
return hash;
}
// compute and cache checksum
hash = computeHash(file, HashType.SFV);
cache.put(file, hash);
return hash;
}
/**
* Dummy constructor to prevent instantiation.
*/

View File

@ -29,6 +29,7 @@ import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import net.filebot.Language;
@ -48,6 +49,15 @@ import net.filebot.web.SubtitleDescriptor;
import net.filebot.web.SubtitleProvider;
import net.filebot.web.SubtitleSearchResult;
import com.optimaize.langdetect.DetectedLanguage;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.BuiltInLanguages;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
public final class SubtitleUtilities {
public static Map<File, List<SubtitleDescriptor>> findSubtitleMatches(SubtitleProvider service, Collection<File> fileSet, String languageName, String forceQuery, boolean addOptions, boolean strict) throws Exception {
@ -308,9 +318,9 @@ public final class SubtitleUtilities {
SubRipWriter out = new SubRipWriter(buffer);
for (SubtitleElement it : decodeSubtitles(data)) {
if (outputTimingOffset != 0)
if (outputTimingOffset != 0) {
it = new SubtitleElement(max(0, it.getStart() + outputTimingOffset), max(0, it.getEnd() + outputTimingOffset), it.getText());
}
out.write(it);
}
@ -386,6 +396,31 @@ public final class SubtitleUtilities {
return new MemoryFile(descriptor.getPath(), data);
}
public static String detectSubtitleLanguage(File file) throws IOException {
MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
// detect language
List<DetectedLanguage> probabilities = createLanguageDetector().getProbabilities(subtitleText);
if (probabilities.size() > 0) {
return probabilities.get(0).getLocale().getLanguage();
}
return null;
}
private static LanguageDetectorBuilder languageDetector;
private static LanguageDetector createLanguageDetector() throws IOException {
if (languageDetector == null) {
// load all language profiles and build language detector
List<LdLocale> languages = BuiltInLanguages.getLanguages().stream().filter(lc -> Language.getLanguage(lc.getLanguage()) != null).collect(Collectors.toList());
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readBuiltIn(languages);
languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(languageProfiles);
}
return languageDetector.build();
}
/**
* Dummy constructor to prevent instantiation.
*/

View File

@ -175,16 +175,13 @@ public final class FileUtilities {
}
public static byte[] readFile(File source) throws IOException {
InputStream in = new FileInputStream(source);
try {
long size = source.length();
if (size < 0 || size > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Unable to read file: " + source);
}
long size = source.length();
if (size < 0 || size > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Unable to read file: " + source);
}
try (InputStream in = new FileInputStream(source)) {
byte[] data = new byte[(int) size];
int position = 0;
int read = 0;
@ -193,8 +190,6 @@ public final class FileUtilities {
}
return data;
} finally {
in.close();
}
}