+ subtitle language auto-detection for {lang} binding
This commit is contained in:
parent
5bf402a5b5
commit
9f2b63121f
|
@ -30,5 +30,7 @@
|
|||
<classpathentry kind="lib" path="lib/ivy/jar/sevenzipjbinding.jar"/>
|
||||
<classpathentry kind="lib" path="lib/ivy/bundle/json-io.jar"/>
|
||||
<classpathentry kind="lib" path="lib/ivy/jar/jna-platform.jar"/>
|
||||
<classpathentry kind="lib" path="lib/ivy/jar/language-detector.jar"/>
|
||||
<classpathentry kind="lib" path="lib/ivy/bundle/guava.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
||||
|
|
|
@ -183,6 +183,15 @@
|
|||
<include name="com/github/junrar/**" />
|
||||
</zipfileset>
|
||||
|
||||
<zipfileset src="${dir.lib}/ivy/jar/language-detector.jar">
|
||||
<include name="com/**" />
|
||||
<include name="languages/**" />
|
||||
</zipfileset>
|
||||
|
||||
<zipfileset src="${dir.lib}/ivy/bundle/guava.jar">
|
||||
<include name="com/google/**" />
|
||||
</zipfileset>
|
||||
|
||||
<!-- include classes and native libraries -->
|
||||
<zipfileset src="${dir.lib}/ivy/jar/jna.jar">
|
||||
<include name="com/sun/jna/**" />
|
||||
|
|
1
ivy.xml
1
ivy.xml
|
@ -23,6 +23,7 @@
|
|||
<dependency org="com.fifesoft" name="rsyntaxtextarea" rev="2.5.8" />
|
||||
<dependency org="net.sf.sevenzipjbinding" name="sevenzipjbinding" rev="9.20-2.00beta" />
|
||||
<dependency org="net.sf.sevenzipjbinding" name="sevenzipjbinding-all-platforms" rev="9.20-2.00beta" />
|
||||
<dependency org="com.optimaize.languagedetector" name="language-detector" rev="0.5" />
|
||||
|
||||
<!-- FileBot Scripting -->
|
||||
<dependency org="org.apache.ant" name="ant" rev="1.9.6" />
|
||||
|
|
|
@ -91,9 +91,12 @@ public class Language implements Serializable {
|
|||
};
|
||||
|
||||
public static Language getLanguage(String code) {
|
||||
ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName());
|
||||
if (code == null || code.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
ResourceBundle bundle = ResourceBundle.getBundle(Language.class.getName());
|
||||
String[] values = bundle.getString(code).split("\\t", 3);
|
||||
return new Language(code, values[0], values[1], values[2].split("\\t"));
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -8,6 +8,7 @@ import static net.filebot.format.ExpressionFormatMethods.*;
|
|||
import static net.filebot.hash.VerificationUtilities.*;
|
||||
import static net.filebot.media.MediaDetection.*;
|
||||
import static net.filebot.similarity.Normalization.*;
|
||||
import static net.filebot.subtitle.SubtitleUtilities.*;
|
||||
import static net.filebot.util.FileUtilities.*;
|
||||
import static net.filebot.util.StringUtilities.*;
|
||||
import static net.filebot.web.EpisodeFormat.*;
|
||||
|
@ -32,7 +33,6 @@ import java.util.TreeSet;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.filebot.Cache;
|
||||
import net.filebot.Language;
|
||||
import net.filebot.MediaTypes;
|
||||
import net.filebot.MetaAttributeView;
|
||||
|
@ -512,14 +512,19 @@ public class MediaBindingBean {
|
|||
}
|
||||
|
||||
@Define("lang")
|
||||
public Language detectSubtitleLanguage() throws Exception {
|
||||
public Language getSubtitleLanguage() throws Exception {
|
||||
Locale languageSuffix = releaseInfo.getLanguageSuffix(FileUtilities.getName(getMediaFile()));
|
||||
if (languageSuffix != null)
|
||||
if (languageSuffix != null) {
|
||||
return Language.getLanguage(languageSuffix);
|
||||
}
|
||||
|
||||
// require subtitle file
|
||||
if (!SUBTITLE_FILES.accept(getMediaFile())) {
|
||||
return null;
|
||||
// try to auto-detect subtitle language
|
||||
if (SUBTITLE_FILES.accept(getMediaFile())) {
|
||||
try {
|
||||
return Language.getLanguage(detectSubtitleLanguage(getMediaFile()));
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -1024,21 +1029,6 @@ public class MediaBindingBean {
|
|||
return bindings;
|
||||
}
|
||||
|
||||
private String crc32(File file) throws IOException, InterruptedException {
|
||||
// try to get checksum from cache
|
||||
Cache cache = Cache.getCache(Cache.EPHEMERAL);
|
||||
|
||||
String hash = cache.get(file, String.class);
|
||||
if (hash != null) {
|
||||
return hash;
|
||||
}
|
||||
|
||||
// compute and cache checksum
|
||||
hash = computeHash(file, HashType.SFV);
|
||||
cache.put(file, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
private String getOriginalFileName(File file) {
|
||||
try {
|
||||
return getNameWithoutExtension(new MetaAttributes(file).getOriginalName());
|
||||
|
|
|
@ -10,6 +10,8 @@ import java.util.Map.Entry;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.filebot.Cache;
|
||||
|
||||
public final class VerificationUtilities {
|
||||
|
||||
/**
|
||||
|
@ -103,6 +105,21 @@ public final class VerificationUtilities {
|
|||
return hash.digest();
|
||||
}
|
||||
|
||||
public static String crc32(File file) throws IOException, InterruptedException {
|
||||
// try to get checksum from cache
|
||||
Cache cache = Cache.getCache(Cache.EPHEMERAL);
|
||||
|
||||
String hash = cache.get(file, String.class);
|
||||
if (hash != null) {
|
||||
return hash;
|
||||
}
|
||||
|
||||
// compute and cache checksum
|
||||
hash = computeHash(file, HashType.SFV);
|
||||
cache.put(file, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
*/
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Set;
|
|||
import java.util.TreeSet;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import net.filebot.Language;
|
||||
|
@ -48,6 +49,15 @@ import net.filebot.web.SubtitleDescriptor;
|
|||
import net.filebot.web.SubtitleProvider;
|
||||
import net.filebot.web.SubtitleSearchResult;
|
||||
|
||||
import com.optimaize.langdetect.DetectedLanguage;
|
||||
import com.optimaize.langdetect.LanguageDetector;
|
||||
import com.optimaize.langdetect.LanguageDetectorBuilder;
|
||||
import com.optimaize.langdetect.i18n.LdLocale;
|
||||
import com.optimaize.langdetect.ngram.NgramExtractors;
|
||||
import com.optimaize.langdetect.profiles.BuiltInLanguages;
|
||||
import com.optimaize.langdetect.profiles.LanguageProfile;
|
||||
import com.optimaize.langdetect.profiles.LanguageProfileReader;
|
||||
|
||||
public final class SubtitleUtilities {
|
||||
|
||||
public static Map<File, List<SubtitleDescriptor>> findSubtitleMatches(SubtitleProvider service, Collection<File> fileSet, String languageName, String forceQuery, boolean addOptions, boolean strict) throws Exception {
|
||||
|
@ -308,9 +318,9 @@ public final class SubtitleUtilities {
|
|||
SubRipWriter out = new SubRipWriter(buffer);
|
||||
|
||||
for (SubtitleElement it : decodeSubtitles(data)) {
|
||||
if (outputTimingOffset != 0)
|
||||
if (outputTimingOffset != 0) {
|
||||
it = new SubtitleElement(max(0, it.getStart() + outputTimingOffset), max(0, it.getEnd() + outputTimingOffset), it.getText());
|
||||
|
||||
}
|
||||
out.write(it);
|
||||
}
|
||||
|
||||
|
@ -386,6 +396,31 @@ public final class SubtitleUtilities {
|
|||
return new MemoryFile(descriptor.getPath(), data);
|
||||
}
|
||||
|
||||
public static String detectSubtitleLanguage(File file) throws IOException {
|
||||
MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
|
||||
String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
|
||||
|
||||
// detect language
|
||||
List<DetectedLanguage> probabilities = createLanguageDetector().getProbabilities(subtitleText);
|
||||
|
||||
if (probabilities.size() > 0) {
|
||||
return probabilities.get(0).getLocale().getLanguage();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static LanguageDetectorBuilder languageDetector;
|
||||
|
||||
private static LanguageDetector createLanguageDetector() throws IOException {
|
||||
if (languageDetector == null) {
|
||||
// load all language profiles and build language detector
|
||||
List<LdLocale> languages = BuiltInLanguages.getLanguages().stream().filter(lc -> Language.getLanguage(lc.getLanguage()) != null).collect(Collectors.toList());
|
||||
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readBuiltIn(languages);
|
||||
languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(languageProfiles);
|
||||
}
|
||||
return languageDetector.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
*/
|
||||
|
|
|
@ -175,16 +175,13 @@ public final class FileUtilities {
|
|||
}
|
||||
|
||||
public static byte[] readFile(File source) throws IOException {
|
||||
InputStream in = new FileInputStream(source);
|
||||
|
||||
try {
|
||||
long size = source.length();
|
||||
if (size < 0 || size > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Unable to read file: " + source);
|
||||
}
|
||||
long size = source.length();
|
||||
if (size < 0 || size > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("Unable to read file: " + source);
|
||||
}
|
||||
|
||||
try (InputStream in = new FileInputStream(source)) {
|
||||
byte[] data = new byte[(int) size];
|
||||
|
||||
int position = 0;
|
||||
int read = 0;
|
||||
|
||||
|
@ -193,8 +190,6 @@ public final class FileUtilities {
|
|||
}
|
||||
|
||||
return data;
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue