From 3ded6a5628be3b4ef0e77cf9caefeab5c5705866 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Sat, 27 Jun 2009 16:02:31 +0000 Subject: [PATCH] + added subtitle package and parsers for some formats * added SubRip (.srt) support * added MicroDVD (.sub) support * added SubViewer (.sub) support * added SubStationAlpha (.ssa, .ass) support --- .../net/sourceforge/filebot/MediaTypes.java | 6 +- source/net/sourceforge/filebot/media.types | 23 +++-- .../filebot/subtitle/MicroDVDReader.java | 65 +++++++++++++ .../filebot/subtitle/SubRipReader.java | 50 ++++++++++ .../subtitle/SubStationAlphaReader.java | 80 ++++++++++++++++ .../filebot/subtitle/SubViewerReader.java | 43 +++++++++ .../filebot/subtitle/SubtitleElement.java | 40 ++++++++ .../filebot/subtitle/SubtitleFormat.java | 60 ++++++++++++ .../filebot/subtitle/SubtitleReader.java | 94 +++++++++++++++++++ .../filebot/subtitle/SubtitleTimeFormat.java | 59 ++++++++++++ .../sourceforge/filebot/FileBotTestSuite.java | 15 +-- .../filebot/subtitle/MicroDVDReaderTest.java | 49 ++++++++++ .../filebot/subtitle/SubRipReaderTest.java | 43 +++++++++ .../subtitle/SubtitleReaderTestSuite.java | 14 +++ 14 files changed, 624 insertions(+), 17 deletions(-) create mode 100644 source/net/sourceforge/filebot/subtitle/MicroDVDReader.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubRipReader.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubStationAlphaReader.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubViewerReader.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubtitleElement.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubtitleFormat.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubtitleReader.java create mode 100644 source/net/sourceforge/filebot/subtitle/SubtitleTimeFormat.java create mode 100644 test/net/sourceforge/filebot/subtitle/MicroDVDReaderTest.java create mode 100644 test/net/sourceforge/filebot/subtitle/SubRipReaderTest.java create mode 100644 test/net/sourceforge/filebot/subtitle/SubtitleReaderTestSuite.java diff --git a/source/net/sourceforge/filebot/MediaTypes.java b/source/net/sourceforge/filebot/MediaTypes.java index 63c8f354..73e5ad7f 100644 --- a/source/net/sourceforge/filebot/MediaTypes.java +++ b/source/net/sourceforge/filebot/MediaTypes.java @@ -59,15 +59,15 @@ public class MediaTypes { public List extensions(String name) { - List extensions = new ArrayList(); + List list = new ArrayList(); for (Type type : types) { if (type.name.startsWith(name)) { - addAll(extensions, type.extensions); + addAll(list, type.extensions); } } - return extensions; + return list; } } diff --git a/source/net/sourceforge/filebot/media.types b/source/net/sourceforge/filebot/media.types index 5001a528..ed4b880b 100644 --- a/source/net/sourceforge/filebot/media.types +++ b/source/net/sourceforge/filebot/media.types @@ -92,25 +92,34 @@ flv - + + rmvb + + + - + srt - - + + sub - + + sub + + + ssa ass - - + + smi + sami diff --git a/source/net/sourceforge/filebot/subtitle/MicroDVDReader.java b/source/net/sourceforge/filebot/subtitle/MicroDVDReader.java new file mode 100644 index 00000000..3d7daa14 --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/MicroDVDReader.java @@ -0,0 +1,65 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Scanner; +import java.util.regex.Pattern; + + +public class MicroDVDReader extends SubtitleReader { + + private double fps = 23.976; + + + public MicroDVDReader(Scanner scanner) { + super(scanner); + } + + + @Override + public SubtitleElement readNext() throws Exception { + String line = scanner.nextLine(); + + List properties = new ArrayList(2); + int from = 0; + + while (from < line.length() && line.charAt(from) == '{') { + int to = line.indexOf('}', from + 1); + + // no more properties + if (to < from) + break; + + // extract property + properties.add(line.substring(from + 1, to)); + + // skip property + from = to + 1; + } + + if (properties.size() < 2) + return null; + + long startFrame = Long.parseLong(properties.get(0)); + long endFrame = Long.parseLong(properties.get(1)); + String text = line.substring(from).trim(); + + if (startFrame == 1 && endFrame == 1) { + // override fps + fps = Double.parseDouble(text); + + // ignore line + return null; + } + + // translate '|' to new lines + List lines = Arrays.asList(text.split(Pattern.quote("|"))); + + // convert frame interval to time interval + return new SubtitleElement(Math.round(startFrame * fps), Math.round(endFrame * fps), join(lines, "\n")); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubRipReader.java b/source/net/sourceforge/filebot/subtitle/SubRipReader.java new file mode 100644 index 00000000..f1fb7da0 --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubRipReader.java @@ -0,0 +1,50 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Scanner; +import java.util.TimeZone; + + +public class SubRipReader extends SubtitleReader { + + private final DateFormat timeFormat; + + + public SubRipReader(Scanner scanner) { + super(scanner); + + // format used to parse time stamps (e.g. 00:02:26,407 --> 00:02:31,356) + timeFormat = new SimpleDateFormat("HH:mm:ss,SSS", Locale.ROOT); + timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + + @Override + protected SubtitleElement readNext() throws Exception { + String number = scanner.nextLine(); + + if (!number.matches("\\d+")) + return null; + + String[] interval = scanner.nextLine().split("-->", 2); + + long t1 = timeFormat.parse(interval[0].trim()).getTime(); + long t2 = timeFormat.parse(interval[1].trim()).getTime(); + + List lines = new ArrayList(2); + + // read text + for (String line = scanner.nextLine(); !line.isEmpty() && scanner.hasNextLine(); line = scanner.nextLine()) { + lines.add(line); + } + + return new SubtitleElement(t1, t2, join(lines, "\n")); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubStationAlphaReader.java b/source/net/sourceforge/filebot/subtitle/SubStationAlphaReader.java new file mode 100644 index 00000000..e097cd99 --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubStationAlphaReader.java @@ -0,0 +1,80 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.text.DateFormat; +import java.util.Arrays; +import java.util.HashMap; +import java.util.InputMismatchException; +import java.util.Map; +import java.util.Scanner; +import java.util.regex.Pattern; + + +public class SubStationAlphaReader extends SubtitleReader { + + private final DateFormat timeFormat = new SubtitleTimeFormat(); + + private Map format; + + + public SubStationAlphaReader(Scanner scanner) { + super(scanner); + } + + + private void readFormat() throws Exception { + // read format line (e.g. Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text) + String[] event = scanner.nextLine().split(":", 2); + + // sanity check + if (!event[0].equals("Format")) + throw new InputMismatchException("Illegal format header: " + Arrays.toString(event)); + + String[] columns = event[1].split(","); + + // map column name to column index + format = new HashMap(columns.length); + + for (int i = 0; i < columns.length; i++) { + format.put(columns[i].trim(), i); + } + } + + + @Override + public SubtitleElement readNext() throws Exception { + if (format == null) { + // move to [Events] sections + boolean found = false; + + while (!found && scanner.hasNext()) { + found = scanner.nextLine().equals("[Events]"); + } + + if (!found) { + throw new InputMismatchException("Cannot find [Events] section"); + } + + // read format header + readFormat(); + } + + // read next dialogue line + String[] event = scanner.nextLine().split(":", 2); + + // sanity check + if (!event[0].equals("Dialogue")) + throw new InputMismatchException("Illegal dialogue event: " + Arrays.toString(event)); + + // extract information + String[] row = event[1].split(",", format.size()); + + long start = timeFormat.parse(row[format.get("Start")]).getTime(); + long end = timeFormat.parse(row[format.get("End")]).getTime(); + String[] lines = row[format.get("Text")].trim().split(Pattern.quote("\\n")); + + return new SubtitleElement(start, end, join(Arrays.asList(lines), "\n")); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubViewerReader.java b/source/net/sourceforge/filebot/subtitle/SubViewerReader.java new file mode 100644 index 00000000..8619be72 --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubViewerReader.java @@ -0,0 +1,43 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; +import java.util.regex.Pattern; + + +public class SubViewerReader extends SubtitleReader { + + private final DateFormat timeFormat = new SubtitleTimeFormat(); + + + public SubViewerReader(Scanner scanner) { + super(scanner); + } + + + @Override + protected SubtitleElement readNext() throws Exception { + // element starts with interval (e.g. 00:42:16.33,00:42:19.39) + String[] interval = scanner.nextLine().split(",", 2); + + if (interval.length < 2 || interval[0].startsWith("[")) + return null; + + long t1 = timeFormat.parse(interval[0]).getTime(); + long t2 = timeFormat.parse(interval[1]).getTime(); + + // append subtitle line + List lines = new ArrayList(2); + + for (String text : scanner.nextLine().split(Pattern.quote("[br]"))) { + lines.add(text); + } + + return new SubtitleElement(t1, t2, join(lines, "\n")); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubtitleElement.java b/source/net/sourceforge/filebot/subtitle/SubtitleElement.java new file mode 100644 index 00000000..6ed3780c --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubtitleElement.java @@ -0,0 +1,40 @@ + +package net.sourceforge.filebot.subtitle; + + +public class SubtitleElement { + + private final long start; + private final long end; + + private final String text; + + + public SubtitleElement(long start, long end, String text) { + this.start = start; + this.end = end; + this.text = text; + } + + + public long getStart() { + return start; + } + + + public long getEnd() { + return end; + } + + + public String getText() { + return text; + } + + + @Override + public String toString() { + return String.format("[%d, %d] %s", start, end, text); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubtitleFormat.java b/source/net/sourceforge/filebot/subtitle/SubtitleFormat.java new file mode 100644 index 00000000..24cbe2b0 --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubtitleFormat.java @@ -0,0 +1,60 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.util.Scanner; + +import net.sourceforge.filebot.MediaTypes; +import net.sourceforge.tuned.FileUtilities.ExtensionFileFilter; + + +public enum SubtitleFormat { + + SubRip { + + @Override + public SubtitleReader newReader(Readable readable) { + return new SubRipReader(new Scanner(readable)); + } + }, + + MicroDVD { + + @Override + public SubtitleReader newReader(Readable readable) { + return new MicroDVDReader(new Scanner(readable)); + } + }, + + SubViewer { + + @Override + public SubtitleReader newReader(Readable readable) { + return new SubViewerReader(new Scanner(readable)); + } + }, + + SubStationAlpha { + + @Override + public SubtitleReader newReader(Readable readable) { + return new SubStationAlphaReader(new Scanner(readable)); + } + }, + + SAMI { + + @Override + public SubtitleReader newReader(Readable readable) { + throw new UnsupportedOperationException("SAMI reader not implemented"); + } + }; + + public abstract SubtitleReader newReader(Readable readable); + + + public ExtensionFileFilter filter() { + return MediaTypes.getDefault().filter("subtitle/" + this); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubtitleReader.java b/source/net/sourceforge/filebot/subtitle/SubtitleReader.java new file mode 100644 index 00000000..781addbc --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubtitleReader.java @@ -0,0 +1,94 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Scanner; +import java.util.logging.Level; +import java.util.logging.Logger; + + +public abstract class SubtitleReader implements Iterator, Closeable { + + protected final Scanner scanner; + + protected SubtitleElement current; + + + public SubtitleReader(File file) throws FileNotFoundException { + // don't use new Scanner(File) because of BUG 6368019 (http://bugs.sun.com/view_bug.do?bug_id=6368019) + this(new Scanner(new FileInputStream(file), "UTF-8")); + } + + + public SubtitleReader(Scanner scanner) { + this.scanner = scanner; + } + + + protected abstract SubtitleElement readNext() throws Exception; + + + @Override + public boolean hasNext() { + // find next element + while (current == null && scanner.hasNextLine()) { + try { + current = readNext(); + } catch (Exception e) { + // log and ignore + Logger.getLogger(getClass().getName()).log(Level.WARNING, e.toString(), e); + } + } + + return current != null; + } + + + @Override + public SubtitleElement next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + try { + return current; + } finally { + current = null; + } + } + + + protected String join(Iterable values, String delimiter) { + StringBuilder sb = new StringBuilder(); + + for (Iterator iterator = values.iterator(); iterator.hasNext();) { + sb.append(iterator.next()); + + if (iterator.hasNext()) { + sb.append(delimiter); + } + } + + return sb.toString(); + } + + + @Override + public void close() throws IOException { + scanner.close(); + } + + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + +} diff --git a/source/net/sourceforge/filebot/subtitle/SubtitleTimeFormat.java b/source/net/sourceforge/filebot/subtitle/SubtitleTimeFormat.java new file mode 100644 index 00000000..d097e8c1 --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubtitleTimeFormat.java @@ -0,0 +1,59 @@ + +package net.sourceforge.filebot.subtitle; + + +import java.text.DateFormat; +import java.text.FieldPosition; +import java.text.ParsePosition; +import java.util.Calendar; +import java.util.Date; +import java.util.Locale; +import java.util.Scanner; +import java.util.TimeZone; + + +class SubtitleTimeFormat extends DateFormat { + + public SubtitleTimeFormat() { + // calendar without any kind of special handling for time zone and daylight saving time + calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + } + + + @Override + public StringBuffer format(Date date, StringBuffer sb, FieldPosition pos) { + // e.g. 1:42:52.42 + calendar.setTime(date); + + sb.append(String.format("%02d", calendar.get(Calendar.HOUR_OF_DAY))); + sb.append(':').append(String.format("%02d", calendar.get(Calendar.MINUTE))); + sb.append(':').append(String.format("%02d", calendar.get(Calendar.SECOND))); + + String millis = String.format("%03d", calendar.get(Calendar.MILLISECOND)); + sb.append('.').append(millis.substring(0, 2)); + + return sb; + } + + + @Override + public Date parse(String source, ParsePosition pos) { + Scanner scanner = new Scanner(source).useDelimiter(":|\\."); + + // reset state + calendar.clear(); + + // handle hours:minutes:seconds + calendar.set(Calendar.HOUR_OF_DAY, scanner.nextInt()); + calendar.set(Calendar.MINUTE, scanner.nextInt()); + calendar.set(Calendar.SECOND, scanner.nextInt()); + + // handle hundredth seconds + calendar.set(Calendar.MILLISECOND, scanner.nextInt() * 10); + + // update position + pos.setIndex(scanner.match().end()); + + return calendar.getTime(); + } +} diff --git a/test/net/sourceforge/filebot/FileBotTestSuite.java b/test/net/sourceforge/filebot/FileBotTestSuite.java index ab7d07f7..c182ec10 100644 --- a/test/net/sourceforge/filebot/FileBotTestSuite.java +++ b/test/net/sourceforge/filebot/FileBotTestSuite.java @@ -2,19 +2,20 @@ package net.sourceforge.filebot; -import net.sourceforge.filebot.format.ExpressionFormatTest; -import net.sourceforge.filebot.hash.VerificationFormatTest; -import net.sourceforge.filebot.similarity.SimilarityTestSuite; -import net.sourceforge.filebot.ui.panel.rename.MatchModelTest; -import net.sourceforge.filebot.web.WebTestSuite; - import org.junit.runner.RunWith; import org.junit.runners.Suite; import org.junit.runners.Suite.SuiteClasses; +import net.sourceforge.filebot.format.ExpressionFormatTest; +import net.sourceforge.filebot.hash.VerificationFormatTest; +import net.sourceforge.filebot.similarity.SimilarityTestSuite; +import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite; +import net.sourceforge.filebot.ui.panel.rename.MatchModelTest; +import net.sourceforge.filebot.web.WebTestSuite; + @RunWith(Suite.class) -@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class }) +@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, SubtitleReaderTestSuite.class }) public class FileBotTestSuite { } diff --git a/test/net/sourceforge/filebot/subtitle/MicroDVDReaderTest.java b/test/net/sourceforge/filebot/subtitle/MicroDVDReaderTest.java new file mode 100644 index 00000000..53fc9838 --- /dev/null +++ b/test/net/sourceforge/filebot/subtitle/MicroDVDReaderTest.java @@ -0,0 +1,49 @@ + +package net.sourceforge.filebot.subtitle; + + +import static org.junit.Assert.*; + +import java.util.*; + +import org.junit.*; + + +public class MicroDVDReaderTest { + + @Test + public void parse() throws Exception { + MicroDVDReader reader = new MicroDVDReader(new Scanner("{856}{900}what's the plan?")); + + SubtitleElement element = reader.next(); + + assertEquals(856 * 23.976, element.getStart(), 1); + assertEquals(900 * 23.976, element.getEnd(), 1); + assertEquals("what's the plan?", element.getText()); + } + + + @Test + public void fps() throws Exception { + MicroDVDReader reader = new MicroDVDReader(new Scanner("{1}{1}100\n{300}{400} trim me ")); + + SubtitleElement element = reader.next(); + + assertEquals(300 * 100, element.getStart(), 0); + assertEquals(400 * 100, element.getEnd(), 0); + assertEquals("trim me", element.getText()); + } + + + @Test + public void newline() throws Exception { + MicroDVDReader reader = new MicroDVDReader(new Scanner("\n\n{300}{400} l1|l2|l3| \n\n")); + + String[] lines = reader.next().getText().split("\\n"); + + assertEquals(3, lines.length); + assertEquals("l1", lines[0]); + assertEquals("l2", lines[1]); + assertEquals("l3", lines[2]); + } +} diff --git a/test/net/sourceforge/filebot/subtitle/SubRipReaderTest.java b/test/net/sourceforge/filebot/subtitle/SubRipReaderTest.java new file mode 100644 index 00000000..4952766e --- /dev/null +++ b/test/net/sourceforge/filebot/subtitle/SubRipReaderTest.java @@ -0,0 +1,43 @@ + +package net.sourceforge.filebot.subtitle; + + +import static org.junit.Assert.*; + +import java.io.InputStream; +import java.net.URL; +import java.util.LinkedList; +import java.util.Scanner; +import java.util.zip.GZIPInputStream; + +import org.junit.Test; + + +public class SubRipReaderTest { + + @Test + public void parse() throws Exception { + LinkedList list = new LinkedList(); + + URL resource = new URL("http://www.opensubtitles.org/en/download/file/1951733951.gz"); + InputStream stream = new GZIPInputStream(resource.openStream()); + + SubRipReader reader = new SubRipReader(new Scanner(stream, "UTF-8")); + + try { + while (reader.hasNext()) { + list.add(reader.next()); + } + } finally { + reader.close(); + } + + assertEquals(499, list.size(), 0); + + assertEquals(3455, list.getFirst().getStart(), 0); + assertEquals(6799, list.getFirst().getEnd(), 0); + + assertEquals("Come with me if you want to live.", list.get(253).getText()); + } + +} diff --git a/test/net/sourceforge/filebot/subtitle/SubtitleReaderTestSuite.java b/test/net/sourceforge/filebot/subtitle/SubtitleReaderTestSuite.java new file mode 100644 index 00000000..e7fd27d0 --- /dev/null +++ b/test/net/sourceforge/filebot/subtitle/SubtitleReaderTestSuite.java @@ -0,0 +1,14 @@ + +package net.sourceforge.filebot.subtitle; + + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; +import org.junit.runners.Suite.SuiteClasses; + + +@RunWith(Suite.class) +@SuiteClasses( { SubRipReaderTest.class, MicroDVDReaderTest.class }) +public class SubtitleReaderTestSuite { + +}