+ added subtitle package and parsers for some formats

* added SubRip (.srt) support
* added MicroDVD (.sub) support
* added SubViewer (.sub) support
* added SubStationAlpha (.ssa, .ass) support
This commit is contained in:
Reinhard Pointner 2009-06-27 16:02:31 +00:00
parent 5a2d1459f2
commit 3ded6a5628
14 changed files with 624 additions and 17 deletions

View File

@ -59,15 +59,15 @@ public class MediaTypes {
public List<String> extensions(String name) { public List<String> extensions(String name) {
List<String> extensions = new ArrayList<String>(); List<String> list = new ArrayList<String>();
for (Type type : types) { for (Type type : types) {
if (type.name.startsWith(name)) { if (type.name.startsWith(name)) {
addAll(extensions, type.extensions); addAll(list, type.extensions);
} }
} }
return extensions; return list;
} }
} }

View File

@ -92,25 +92,34 @@
<extension>flv</extension> <extension>flv</extension>
</type> </type>
<type name="video/rmvb">
<extension>rmvb</extension>
</type>
<!-- <!--
Subtitles Subtitles
--> -->
<type name="subtitle/srt"> <type name="subtitle/SubRip">
<extension>srt</extension> <extension>srt</extension>
</type> </type>
<type name="subtitle/sub"> <type name="subtitle/MicroDVD">
<extension>sub</extension> <extension>sub</extension>
</type> </type>
<type name="subtitle/ssa"> <type name="subtitle/SubViewer">
<extension>sub</extension>
</type>
<type name="subtitle/SubStationAlpha">
<extension>ssa</extension> <extension>ssa</extension>
<extension>ass</extension> <extension>ass</extension>
</type> </type>
<type name="subtitle/smi"> <type name="subtitle/SAMI">
<extension>smi</extension> <extension>smi</extension>
<extension>sami</extension>
</type> </type>
</media-types> </media-types>

View File

@ -0,0 +1,65 @@
package net.sourceforge.filebot.subtitle;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern;
public class MicroDVDReader extends SubtitleReader {
private double fps = 23.976;
public MicroDVDReader(Scanner scanner) {
super(scanner);
}
@Override
public SubtitleElement readNext() throws Exception {
String line = scanner.nextLine();
List<String> properties = new ArrayList<String>(2);
int from = 0;
while (from < line.length() && line.charAt(from) == '{') {
int to = line.indexOf('}', from + 1);
// no more properties
if (to < from)
break;
// extract property
properties.add(line.substring(from + 1, to));
// skip property
from = to + 1;
}
if (properties.size() < 2)
return null;
long startFrame = Long.parseLong(properties.get(0));
long endFrame = Long.parseLong(properties.get(1));
String text = line.substring(from).trim();
if (startFrame == 1 && endFrame == 1) {
// override fps
fps = Double.parseDouble(text);
// ignore line
return null;
}
// translate '|' to new lines
List<String> lines = Arrays.asList(text.split(Pattern.quote("|")));
// convert frame interval to time interval
return new SubtitleElement(Math.round(startFrame * fps), Math.round(endFrame * fps), join(lines, "\n"));
}
}

View File

@ -0,0 +1,50 @@
package net.sourceforge.filebot.subtitle;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.TimeZone;
public class SubRipReader extends SubtitleReader {
private final DateFormat timeFormat;
public SubRipReader(Scanner scanner) {
super(scanner);
// format used to parse time stamps (e.g. 00:02:26,407 --> 00:02:31,356)
timeFormat = new SimpleDateFormat("HH:mm:ss,SSS", Locale.ROOT);
timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
}
@Override
protected SubtitleElement readNext() throws Exception {
String number = scanner.nextLine();
if (!number.matches("\\d+"))
return null;
String[] interval = scanner.nextLine().split("-->", 2);
long t1 = timeFormat.parse(interval[0].trim()).getTime();
long t2 = timeFormat.parse(interval[1].trim()).getTime();
List<String> lines = new ArrayList<String>(2);
// read text
for (String line = scanner.nextLine(); !line.isEmpty() && scanner.hasNextLine(); line = scanner.nextLine()) {
lines.add(line);
}
return new SubtitleElement(t1, t2, join(lines, "\n"));
}
}

View File

@ -0,0 +1,80 @@
package net.sourceforge.filebot.subtitle;
import java.text.DateFormat;
import java.util.Arrays;
import java.util.HashMap;
import java.util.InputMismatchException;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.Pattern;
public class SubStationAlphaReader extends SubtitleReader {
private final DateFormat timeFormat = new SubtitleTimeFormat();
private Map<String, Integer> format;
public SubStationAlphaReader(Scanner scanner) {
super(scanner);
}
private void readFormat() throws Exception {
// read format line (e.g. Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text)
String[] event = scanner.nextLine().split(":", 2);
// sanity check
if (!event[0].equals("Format"))
throw new InputMismatchException("Illegal format header: " + Arrays.toString(event));
String[] columns = event[1].split(",");
// map column name to column index
format = new HashMap<String, Integer>(columns.length);
for (int i = 0; i < columns.length; i++) {
format.put(columns[i].trim(), i);
}
}
@Override
public SubtitleElement readNext() throws Exception {
if (format == null) {
// move to [Events] sections
boolean found = false;
while (!found && scanner.hasNext()) {
found = scanner.nextLine().equals("[Events]");
}
if (!found) {
throw new InputMismatchException("Cannot find [Events] section");
}
// read format header
readFormat();
}
// read next dialogue line
String[] event = scanner.nextLine().split(":", 2);
// sanity check
if (!event[0].equals("Dialogue"))
throw new InputMismatchException("Illegal dialogue event: " + Arrays.toString(event));
// extract information
String[] row = event[1].split(",", format.size());
long start = timeFormat.parse(row[format.get("Start")]).getTime();
long end = timeFormat.parse(row[format.get("End")]).getTime();
String[] lines = row[format.get("Text")].trim().split(Pattern.quote("\\n"));
return new SubtitleElement(start, end, join(Arrays.asList(lines), "\n"));
}
}

View File

@ -0,0 +1,43 @@
package net.sourceforge.filebot.subtitle;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern;
public class SubViewerReader extends SubtitleReader {
private final DateFormat timeFormat = new SubtitleTimeFormat();
public SubViewerReader(Scanner scanner) {
super(scanner);
}
@Override
protected SubtitleElement readNext() throws Exception {
// element starts with interval (e.g. 00:42:16.33,00:42:19.39)
String[] interval = scanner.nextLine().split(",", 2);
if (interval.length < 2 || interval[0].startsWith("["))
return null;
long t1 = timeFormat.parse(interval[0]).getTime();
long t2 = timeFormat.parse(interval[1]).getTime();
// append subtitle line
List<String> lines = new ArrayList<String>(2);
for (String text : scanner.nextLine().split(Pattern.quote("[br]"))) {
lines.add(text);
}
return new SubtitleElement(t1, t2, join(lines, "\n"));
}
}

View File

@ -0,0 +1,40 @@
package net.sourceforge.filebot.subtitle;
public class SubtitleElement {
private final long start;
private final long end;
private final String text;
public SubtitleElement(long start, long end, String text) {
this.start = start;
this.end = end;
this.text = text;
}
public long getStart() {
return start;
}
public long getEnd() {
return end;
}
public String getText() {
return text;
}
@Override
public String toString() {
return String.format("[%d, %d] %s", start, end, text);
}
}

View File

@ -0,0 +1,60 @@
package net.sourceforge.filebot.subtitle;
import java.util.Scanner;
import net.sourceforge.filebot.MediaTypes;
import net.sourceforge.tuned.FileUtilities.ExtensionFileFilter;
public enum SubtitleFormat {
SubRip {
@Override
public SubtitleReader newReader(Readable readable) {
return new SubRipReader(new Scanner(readable));
}
},
MicroDVD {
@Override
public SubtitleReader newReader(Readable readable) {
return new MicroDVDReader(new Scanner(readable));
}
},
SubViewer {
@Override
public SubtitleReader newReader(Readable readable) {
return new SubViewerReader(new Scanner(readable));
}
},
SubStationAlpha {
@Override
public SubtitleReader newReader(Readable readable) {
return new SubStationAlphaReader(new Scanner(readable));
}
},
SAMI {
@Override
public SubtitleReader newReader(Readable readable) {
throw new UnsupportedOperationException("SAMI reader not implemented");
}
};
public abstract SubtitleReader newReader(Readable readable);
public ExtensionFileFilter filter() {
return MediaTypes.getDefault().filter("subtitle/" + this);
}
}

View File

@ -0,0 +1,94 @@
package net.sourceforge.filebot.subtitle;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
public abstract class SubtitleReader implements Iterator<SubtitleElement>, Closeable {
protected final Scanner scanner;
protected SubtitleElement current;
public SubtitleReader(File file) throws FileNotFoundException {
// don't use new Scanner(File) because of BUG 6368019 (http://bugs.sun.com/view_bug.do?bug_id=6368019)
this(new Scanner(new FileInputStream(file), "UTF-8"));
}
public SubtitleReader(Scanner scanner) {
this.scanner = scanner;
}
protected abstract SubtitleElement readNext() throws Exception;
@Override
public boolean hasNext() {
// find next element
while (current == null && scanner.hasNextLine()) {
try {
current = readNext();
} catch (Exception e) {
// log and ignore
Logger.getLogger(getClass().getName()).log(Level.WARNING, e.toString(), e);
}
}
return current != null;
}
@Override
public SubtitleElement next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
try {
return current;
} finally {
current = null;
}
}
protected String join(Iterable<?> values, String delimiter) {
StringBuilder sb = new StringBuilder();
for (Iterator<?> iterator = values.iterator(); iterator.hasNext();) {
sb.append(iterator.next());
if (iterator.hasNext()) {
sb.append(delimiter);
}
}
return sb.toString();
}
@Override
public void close() throws IOException {
scanner.close();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,59 @@
package net.sourceforge.filebot.subtitle;
import java.text.DateFormat;
import java.text.FieldPosition;
import java.text.ParsePosition;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.Scanner;
import java.util.TimeZone;
class SubtitleTimeFormat extends DateFormat {
public SubtitleTimeFormat() {
// calendar without any kind of special handling for time zone and daylight saving time
calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
}
@Override
public StringBuffer format(Date date, StringBuffer sb, FieldPosition pos) {
// e.g. 1:42:52.42
calendar.setTime(date);
sb.append(String.format("%02d", calendar.get(Calendar.HOUR_OF_DAY)));
sb.append(':').append(String.format("%02d", calendar.get(Calendar.MINUTE)));
sb.append(':').append(String.format("%02d", calendar.get(Calendar.SECOND)));
String millis = String.format("%03d", calendar.get(Calendar.MILLISECOND));
sb.append('.').append(millis.substring(0, 2));
return sb;
}
@Override
public Date parse(String source, ParsePosition pos) {
Scanner scanner = new Scanner(source).useDelimiter(":|\\.");
// reset state
calendar.clear();
// handle hours:minutes:seconds
calendar.set(Calendar.HOUR_OF_DAY, scanner.nextInt());
calendar.set(Calendar.MINUTE, scanner.nextInt());
calendar.set(Calendar.SECOND, scanner.nextInt());
// handle hundredth seconds
calendar.set(Calendar.MILLISECOND, scanner.nextInt() * 10);
// update position
pos.setIndex(scanner.match().end());
return calendar.getTime();
}
}

View File

@ -2,19 +2,20 @@
package net.sourceforge.filebot; package net.sourceforge.filebot;
import net.sourceforge.filebot.format.ExpressionFormatTest;
import net.sourceforge.filebot.hash.VerificationFormatTest;
import net.sourceforge.filebot.similarity.SimilarityTestSuite;
import net.sourceforge.filebot.ui.panel.rename.MatchModelTest;
import net.sourceforge.filebot.web.WebTestSuite;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.runners.Suite; import org.junit.runners.Suite;
import org.junit.runners.Suite.SuiteClasses; import org.junit.runners.Suite.SuiteClasses;
import net.sourceforge.filebot.format.ExpressionFormatTest;
import net.sourceforge.filebot.hash.VerificationFormatTest;
import net.sourceforge.filebot.similarity.SimilarityTestSuite;
import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite;
import net.sourceforge.filebot.ui.panel.rename.MatchModelTest;
import net.sourceforge.filebot.web.WebTestSuite;
@RunWith(Suite.class) @RunWith(Suite.class)
@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class }) @SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, SubtitleReaderTestSuite.class })
public class FileBotTestSuite { public class FileBotTestSuite {
} }

View File

@ -0,0 +1,49 @@
package net.sourceforge.filebot.subtitle;
import static org.junit.Assert.*;
import java.util.*;
import org.junit.*;
public class MicroDVDReaderTest {
@Test
public void parse() throws Exception {
MicroDVDReader reader = new MicroDVDReader(new Scanner("{856}{900}what's the plan?"));
SubtitleElement element = reader.next();
assertEquals(856 * 23.976, element.getStart(), 1);
assertEquals(900 * 23.976, element.getEnd(), 1);
assertEquals("what's the plan?", element.getText());
}
@Test
public void fps() throws Exception {
MicroDVDReader reader = new MicroDVDReader(new Scanner("{1}{1}100\n{300}{400} trim me "));
SubtitleElement element = reader.next();
assertEquals(300 * 100, element.getStart(), 0);
assertEquals(400 * 100, element.getEnd(), 0);
assertEquals("trim me", element.getText());
}
@Test
public void newline() throws Exception {
MicroDVDReader reader = new MicroDVDReader(new Scanner("\n\n{300}{400} l1|l2|l3| \n\n"));
String[] lines = reader.next().getText().split("\\n");
assertEquals(3, lines.length);
assertEquals("l1", lines[0]);
assertEquals("l2", lines[1]);
assertEquals("l3", lines[2]);
}
}

View File

@ -0,0 +1,43 @@
package net.sourceforge.filebot.subtitle;
import static org.junit.Assert.*;
import java.io.InputStream;
import java.net.URL;
import java.util.LinkedList;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;
import org.junit.Test;
public class SubRipReaderTest {
@Test
public void parse() throws Exception {
LinkedList<SubtitleElement> list = new LinkedList<SubtitleElement>();
URL resource = new URL("http://www.opensubtitles.org/en/download/file/1951733951.gz");
InputStream stream = new GZIPInputStream(resource.openStream());
SubRipReader reader = new SubRipReader(new Scanner(stream, "UTF-8"));
try {
while (reader.hasNext()) {
list.add(reader.next());
}
} finally {
reader.close();
}
assertEquals(499, list.size(), 0);
assertEquals(3455, list.getFirst().getStart(), 0);
assertEquals(6799, list.getFirst().getEnd(), 0);
assertEquals("Come with me if you want to live.", list.get(253).getText());
}
}

View File

@ -0,0 +1,14 @@
package net.sourceforge.filebot.subtitle;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
import org.junit.runners.Suite.SuiteClasses;
@RunWith(Suite.class)
@SuiteClasses( { SubRipReaderTest.class, MicroDVDReaderTest.class })
public class SubtitleReaderTestSuite {
}