diff --git a/build.xml b/build.xml index 3d2ee2e7..34a195b0 100644 --- a/build.xml +++ b/build.xml @@ -119,6 +119,10 @@ + + + + diff --git a/installer/webstart/filebot.lib.jnlp b/installer/webstart/filebot.lib.jnlp index d0a7e176..22f7189c 100644 --- a/installer/webstart/filebot.lib.jnlp +++ b/installer/webstart/filebot.lib.jnlp @@ -25,6 +25,7 @@ + diff --git a/lib/icu4j.jar b/lib/icu4j.jar new file mode 100644 index 00000000..a97d575b Binary files /dev/null and b/lib/icu4j.jar differ diff --git a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java index 4fda77dc..2d368da7 100644 --- a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java +++ b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java @@ -5,14 +5,15 @@ package net.sourceforge.filebot.ui.panel.subtitle; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; +import java.io.StringReader; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import com.ibm.icu.text.CharsetDetector; + import net.sourceforge.filebot.subtitle.SubtitleElement; import net.sourceforge.filebot.subtitle.SubtitleFormat; import net.sourceforge.filebot.subtitle.SubtitleReader; @@ -22,12 +23,19 @@ import net.sourceforge.tuned.ByteBufferInputStream; final class SubtitleUtilities { /** - * Decode subtitle file even if extension is invalid. + * Detect charset and parse subtitle file even if extension is invalid */ public static List decode(MemoryFile file) throws IOException { - LinkedList priorityList = new LinkedList(); + // detect charset and read text content + CharsetDetector detector = new CharsetDetector(); + detector.enableInputFilter(true); + + detector.setText(new ByteBufferInputStream(file.getData())); + String textfile = detector.detect().getString(); // gather all formats, put likely formats first + LinkedList priorityList = new LinkedList(); + for (SubtitleFormat format : SubtitleFormat.values()) { if (format.getFilter().accept(file.getName())) { priorityList.addFirst(format); @@ -38,23 +46,19 @@ final class SubtitleUtilities { // decode subtitle file with the first reader that seems to work for (SubtitleFormat format : priorityList) { - InputStream data = new ByteBufferInputStream(file.getData()); - SubtitleReader reader = format.newReader(new InputStreamReader(data, "UTF-8")); + // reset reader to position 0 + SubtitleReader parser = format.newReader(new StringReader(textfile)); - try { - if (reader.hasNext()) { - // correct format found - List list = new ArrayList(500); - - // read subtitle file - while (reader.hasNext()) { - list.add(reader.next()); - } - - return list; + if (parser.hasNext()) { + // correct format found + List list = new ArrayList(500); + + // read subtitle file + while (parser.hasNext()) { + list.add(parser.next()); } - } finally { - reader.close(); + + return list; } }