* detect charset when parsing subtitles instead of assuming it's UTF-8
* added ICU4J library to build
This commit is contained in:
parent
5c6e879f6c
commit
7c2c574940
|
@ -119,6 +119,10 @@
|
||||||
<include name="org/codehaus/groovy/**" />
|
<include name="org/codehaus/groovy/**" />
|
||||||
<include name="META-INF/dgminfo" />
|
<include name="META-INF/dgminfo" />
|
||||||
</zipfileset>
|
</zipfileset>
|
||||||
|
|
||||||
|
<zipfileset src="${dir.lib}/icu4j.jar">
|
||||||
|
<include name="com/ibm/icu/text/**" />
|
||||||
|
</zipfileset>
|
||||||
|
|
||||||
<zipfileset src="${dir.lib}/sublight-ws.jar">
|
<zipfileset src="${dir.lib}/sublight-ws.jar">
|
||||||
<include name="net/sublight/webservice/**" />
|
<include name="net/sublight/webservice/**" />
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
<jar href="sublight-ws.jar" />
|
<jar href="sublight-ws.jar" />
|
||||||
<jar href="xmlrpc.jar" />
|
<jar href="xmlrpc.jar" />
|
||||||
<jar href="json-simple.jar" />
|
<jar href="json-simple.jar" />
|
||||||
|
<jar href="icu4j.jar" />
|
||||||
</resources>
|
</resources>
|
||||||
|
|
||||||
<component-desc />
|
<component-desc />
|
||||||
|
|
Binary file not shown.
|
@ -5,14 +5,15 @@ package net.sourceforge.filebot.ui.panel.subtitle;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.StringReader;
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.CharsetDetector;
|
||||||
|
|
||||||
import net.sourceforge.filebot.subtitle.SubtitleElement;
|
import net.sourceforge.filebot.subtitle.SubtitleElement;
|
||||||
import net.sourceforge.filebot.subtitle.SubtitleFormat;
|
import net.sourceforge.filebot.subtitle.SubtitleFormat;
|
||||||
import net.sourceforge.filebot.subtitle.SubtitleReader;
|
import net.sourceforge.filebot.subtitle.SubtitleReader;
|
||||||
|
@ -22,12 +23,19 @@ import net.sourceforge.tuned.ByteBufferInputStream;
|
||||||
final class SubtitleUtilities {
|
final class SubtitleUtilities {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode subtitle file even if extension is invalid.
|
* Detect charset and parse subtitle file even if extension is invalid
|
||||||
*/
|
*/
|
||||||
public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
|
public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
|
||||||
LinkedList<SubtitleFormat> priorityList = new LinkedList<SubtitleFormat>();
|
// detect charset and read text content
|
||||||
|
CharsetDetector detector = new CharsetDetector();
|
||||||
|
detector.enableInputFilter(true);
|
||||||
|
|
||||||
|
detector.setText(new ByteBufferInputStream(file.getData()));
|
||||||
|
String textfile = detector.detect().getString();
|
||||||
|
|
||||||
// gather all formats, put likely formats first
|
// gather all formats, put likely formats first
|
||||||
|
LinkedList<SubtitleFormat> priorityList = new LinkedList<SubtitleFormat>();
|
||||||
|
|
||||||
for (SubtitleFormat format : SubtitleFormat.values()) {
|
for (SubtitleFormat format : SubtitleFormat.values()) {
|
||||||
if (format.getFilter().accept(file.getName())) {
|
if (format.getFilter().accept(file.getName())) {
|
||||||
priorityList.addFirst(format);
|
priorityList.addFirst(format);
|
||||||
|
@ -38,23 +46,19 @@ final class SubtitleUtilities {
|
||||||
|
|
||||||
// decode subtitle file with the first reader that seems to work
|
// decode subtitle file with the first reader that seems to work
|
||||||
for (SubtitleFormat format : priorityList) {
|
for (SubtitleFormat format : priorityList) {
|
||||||
InputStream data = new ByteBufferInputStream(file.getData());
|
// reset reader to position 0
|
||||||
SubtitleReader reader = format.newReader(new InputStreamReader(data, "UTF-8"));
|
SubtitleReader parser = format.newReader(new StringReader(textfile));
|
||||||
|
|
||||||
try {
|
if (parser.hasNext()) {
|
||||||
if (reader.hasNext()) {
|
// correct format found
|
||||||
// correct format found
|
List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
|
||||||
List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
|
|
||||||
|
// read subtitle file
|
||||||
// read subtitle file
|
while (parser.hasNext()) {
|
||||||
while (reader.hasNext()) {
|
list.add(parser.next());
|
||||||
list.add(reader.next());
|
|
||||||
}
|
|
||||||
|
|
||||||
return list;
|
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
reader.close();
|
return list;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue