+ added subtitle package and parsers for some formats
* added SubRip (.srt) support * added MicroDVD (.sub) support * added SubViewer (.sub) support * added SubStationAlpha (.ssa, .ass) support
This commit is contained in:
parent
5a2d1459f2
commit
3ded6a5628
|
@ -59,15 +59,15 @@ public class MediaTypes {
|
|||
|
||||
|
||||
public List<String> extensions(String name) {
|
||||
List<String> extensions = new ArrayList<String>();
|
||||
List<String> list = new ArrayList<String>();
|
||||
|
||||
for (Type type : types) {
|
||||
if (type.name.startsWith(name)) {
|
||||
addAll(extensions, type.extensions);
|
||||
addAll(list, type.extensions);
|
||||
}
|
||||
}
|
||||
|
||||
return extensions;
|
||||
return list;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -92,25 +92,34 @@
|
|||
<extension>flv</extension>
|
||||
</type>
|
||||
|
||||
<type name="video/rmvb">
|
||||
<extension>rmvb</extension>
|
||||
</type>
|
||||
|
||||
|
||||
<!--
|
||||
Subtitles
|
||||
-->
|
||||
<type name="subtitle/srt">
|
||||
<type name="subtitle/SubRip">
|
||||
<extension>srt</extension>
|
||||
</type>
|
||||
|
||||
<type name="subtitle/sub">
|
||||
<type name="subtitle/MicroDVD">
|
||||
<extension>sub</extension>
|
||||
</type>
|
||||
|
||||
<type name="subtitle/ssa">
|
||||
<type name="subtitle/SubViewer">
|
||||
<extension>sub</extension>
|
||||
</type>
|
||||
|
||||
<type name="subtitle/SubStationAlpha">
|
||||
<extension>ssa</extension>
|
||||
<extension>ass</extension>
|
||||
</type>
|
||||
|
||||
<type name="subtitle/smi">
|
||||
<type name="subtitle/SAMI">
|
||||
<extension>smi</extension>
|
||||
<extension>sami</extension>
|
||||
</type>
|
||||
|
||||
</media-types>
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
public class MicroDVDReader extends SubtitleReader {
|
||||
|
||||
private double fps = 23.976;
|
||||
|
||||
|
||||
public MicroDVDReader(Scanner scanner) {
|
||||
super(scanner);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SubtitleElement readNext() throws Exception {
|
||||
String line = scanner.nextLine();
|
||||
|
||||
List<String> properties = new ArrayList<String>(2);
|
||||
int from = 0;
|
||||
|
||||
while (from < line.length() && line.charAt(from) == '{') {
|
||||
int to = line.indexOf('}', from + 1);
|
||||
|
||||
// no more properties
|
||||
if (to < from)
|
||||
break;
|
||||
|
||||
// extract property
|
||||
properties.add(line.substring(from + 1, to));
|
||||
|
||||
// skip property
|
||||
from = to + 1;
|
||||
}
|
||||
|
||||
if (properties.size() < 2)
|
||||
return null;
|
||||
|
||||
long startFrame = Long.parseLong(properties.get(0));
|
||||
long endFrame = Long.parseLong(properties.get(1));
|
||||
String text = line.substring(from).trim();
|
||||
|
||||
if (startFrame == 1 && endFrame == 1) {
|
||||
// override fps
|
||||
fps = Double.parseDouble(text);
|
||||
|
||||
// ignore line
|
||||
return null;
|
||||
}
|
||||
|
||||
// translate '|' to new lines
|
||||
List<String> lines = Arrays.asList(text.split(Pattern.quote("|")));
|
||||
|
||||
// convert frame interval to time interval
|
||||
return new SubtitleElement(Math.round(startFrame * fps), Math.round(endFrame * fps), join(lines, "\n"));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Scanner;
|
||||
import java.util.TimeZone;
|
||||
|
||||
|
||||
public class SubRipReader extends SubtitleReader {
|
||||
|
||||
private final DateFormat timeFormat;
|
||||
|
||||
|
||||
public SubRipReader(Scanner scanner) {
|
||||
super(scanner);
|
||||
|
||||
// format used to parse time stamps (e.g. 00:02:26,407 --> 00:02:31,356)
|
||||
timeFormat = new SimpleDateFormat("HH:mm:ss,SSS", Locale.ROOT);
|
||||
timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected SubtitleElement readNext() throws Exception {
|
||||
String number = scanner.nextLine();
|
||||
|
||||
if (!number.matches("\\d+"))
|
||||
return null;
|
||||
|
||||
String[] interval = scanner.nextLine().split("-->", 2);
|
||||
|
||||
long t1 = timeFormat.parse(interval[0].trim()).getTime();
|
||||
long t2 = timeFormat.parse(interval[1].trim()).getTime();
|
||||
|
||||
List<String> lines = new ArrayList<String>(2);
|
||||
|
||||
// read text
|
||||
for (String line = scanner.nextLine(); !line.isEmpty() && scanner.hasNextLine(); line = scanner.nextLine()) {
|
||||
lines.add(line);
|
||||
}
|
||||
|
||||
return new SubtitleElement(t1, t2, join(lines, "\n"));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.InputMismatchException;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
public class SubStationAlphaReader extends SubtitleReader {
|
||||
|
||||
private final DateFormat timeFormat = new SubtitleTimeFormat();
|
||||
|
||||
private Map<String, Integer> format;
|
||||
|
||||
|
||||
public SubStationAlphaReader(Scanner scanner) {
|
||||
super(scanner);
|
||||
}
|
||||
|
||||
|
||||
private void readFormat() throws Exception {
|
||||
// read format line (e.g. Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text)
|
||||
String[] event = scanner.nextLine().split(":", 2);
|
||||
|
||||
// sanity check
|
||||
if (!event[0].equals("Format"))
|
||||
throw new InputMismatchException("Illegal format header: " + Arrays.toString(event));
|
||||
|
||||
String[] columns = event[1].split(",");
|
||||
|
||||
// map column name to column index
|
||||
format = new HashMap<String, Integer>(columns.length);
|
||||
|
||||
for (int i = 0; i < columns.length; i++) {
|
||||
format.put(columns[i].trim(), i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SubtitleElement readNext() throws Exception {
|
||||
if (format == null) {
|
||||
// move to [Events] sections
|
||||
boolean found = false;
|
||||
|
||||
while (!found && scanner.hasNext()) {
|
||||
found = scanner.nextLine().equals("[Events]");
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
throw new InputMismatchException("Cannot find [Events] section");
|
||||
}
|
||||
|
||||
// read format header
|
||||
readFormat();
|
||||
}
|
||||
|
||||
// read next dialogue line
|
||||
String[] event = scanner.nextLine().split(":", 2);
|
||||
|
||||
// sanity check
|
||||
if (!event[0].equals("Dialogue"))
|
||||
throw new InputMismatchException("Illegal dialogue event: " + Arrays.toString(event));
|
||||
|
||||
// extract information
|
||||
String[] row = event[1].split(",", format.size());
|
||||
|
||||
long start = timeFormat.parse(row[format.get("Start")]).getTime();
|
||||
long end = timeFormat.parse(row[format.get("End")]).getTime();
|
||||
String[] lines = row[format.get("Text")].trim().split(Pattern.quote("\\n"));
|
||||
|
||||
return new SubtitleElement(start, end, join(Arrays.asList(lines), "\n"));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
public class SubViewerReader extends SubtitleReader {
|
||||
|
||||
private final DateFormat timeFormat = new SubtitleTimeFormat();
|
||||
|
||||
|
||||
public SubViewerReader(Scanner scanner) {
|
||||
super(scanner);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected SubtitleElement readNext() throws Exception {
|
||||
// element starts with interval (e.g. 00:42:16.33,00:42:19.39)
|
||||
String[] interval = scanner.nextLine().split(",", 2);
|
||||
|
||||
if (interval.length < 2 || interval[0].startsWith("["))
|
||||
return null;
|
||||
|
||||
long t1 = timeFormat.parse(interval[0]).getTime();
|
||||
long t2 = timeFormat.parse(interval[1]).getTime();
|
||||
|
||||
// append subtitle line
|
||||
List<String> lines = new ArrayList<String>(2);
|
||||
|
||||
for (String text : scanner.nextLine().split(Pattern.quote("[br]"))) {
|
||||
lines.add(text);
|
||||
}
|
||||
|
||||
return new SubtitleElement(t1, t2, join(lines, "\n"));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
public class SubtitleElement {
|
||||
|
||||
private final long start;
|
||||
private final long end;
|
||||
|
||||
private final String text;
|
||||
|
||||
|
||||
public SubtitleElement(long start, long end, String text) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
|
||||
public long getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
public long getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("[%d, %d] %s", start, end, text);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.util.Scanner;
|
||||
|
||||
import net.sourceforge.filebot.MediaTypes;
|
||||
import net.sourceforge.tuned.FileUtilities.ExtensionFileFilter;
|
||||
|
||||
|
||||
public enum SubtitleFormat {
|
||||
|
||||
SubRip {
|
||||
|
||||
@Override
|
||||
public SubtitleReader newReader(Readable readable) {
|
||||
return new SubRipReader(new Scanner(readable));
|
||||
}
|
||||
},
|
||||
|
||||
MicroDVD {
|
||||
|
||||
@Override
|
||||
public SubtitleReader newReader(Readable readable) {
|
||||
return new MicroDVDReader(new Scanner(readable));
|
||||
}
|
||||
},
|
||||
|
||||
SubViewer {
|
||||
|
||||
@Override
|
||||
public SubtitleReader newReader(Readable readable) {
|
||||
return new SubViewerReader(new Scanner(readable));
|
||||
}
|
||||
},
|
||||
|
||||
SubStationAlpha {
|
||||
|
||||
@Override
|
||||
public SubtitleReader newReader(Readable readable) {
|
||||
return new SubStationAlphaReader(new Scanner(readable));
|
||||
}
|
||||
},
|
||||
|
||||
SAMI {
|
||||
|
||||
@Override
|
||||
public SubtitleReader newReader(Readable readable) {
|
||||
throw new UnsupportedOperationException("SAMI reader not implemented");
|
||||
}
|
||||
};
|
||||
|
||||
public abstract SubtitleReader newReader(Readable readable);
|
||||
|
||||
|
||||
public ExtensionFileFilter filter() {
|
||||
return MediaTypes.getDefault().filter("subtitle/" + this);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Scanner;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
|
||||
public abstract class SubtitleReader implements Iterator<SubtitleElement>, Closeable {
|
||||
|
||||
protected final Scanner scanner;
|
||||
|
||||
protected SubtitleElement current;
|
||||
|
||||
|
||||
public SubtitleReader(File file) throws FileNotFoundException {
|
||||
// don't use new Scanner(File) because of BUG 6368019 (http://bugs.sun.com/view_bug.do?bug_id=6368019)
|
||||
this(new Scanner(new FileInputStream(file), "UTF-8"));
|
||||
}
|
||||
|
||||
|
||||
public SubtitleReader(Scanner scanner) {
|
||||
this.scanner = scanner;
|
||||
}
|
||||
|
||||
|
||||
protected abstract SubtitleElement readNext() throws Exception;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
// find next element
|
||||
while (current == null && scanner.hasNextLine()) {
|
||||
try {
|
||||
current = readNext();
|
||||
} catch (Exception e) {
|
||||
// log and ignore
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
return current != null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SubtitleElement next() {
|
||||
if (!hasNext()) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
try {
|
||||
return current;
|
||||
} finally {
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected String join(Iterable<?> values, String delimiter) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (Iterator<?> iterator = values.iterator(); iterator.hasNext();) {
|
||||
sb.append(iterator.next());
|
||||
|
||||
if (iterator.hasNext()) {
|
||||
sb.append(delimiter);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
scanner.close();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.util.Scanner;
|
||||
import java.util.TimeZone;
|
||||
|
||||
|
||||
class SubtitleTimeFormat extends DateFormat {
|
||||
|
||||
public SubtitleTimeFormat() {
|
||||
// calendar without any kind of special handling for time zone and daylight saving time
|
||||
calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public StringBuffer format(Date date, StringBuffer sb, FieldPosition pos) {
|
||||
// e.g. 1:42:52.42
|
||||
calendar.setTime(date);
|
||||
|
||||
sb.append(String.format("%02d", calendar.get(Calendar.HOUR_OF_DAY)));
|
||||
sb.append(':').append(String.format("%02d", calendar.get(Calendar.MINUTE)));
|
||||
sb.append(':').append(String.format("%02d", calendar.get(Calendar.SECOND)));
|
||||
|
||||
String millis = String.format("%03d", calendar.get(Calendar.MILLISECOND));
|
||||
sb.append('.').append(millis.substring(0, 2));
|
||||
|
||||
return sb;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Date parse(String source, ParsePosition pos) {
|
||||
Scanner scanner = new Scanner(source).useDelimiter(":|\\.");
|
||||
|
||||
// reset state
|
||||
calendar.clear();
|
||||
|
||||
// handle hours:minutes:seconds
|
||||
calendar.set(Calendar.HOUR_OF_DAY, scanner.nextInt());
|
||||
calendar.set(Calendar.MINUTE, scanner.nextInt());
|
||||
calendar.set(Calendar.SECOND, scanner.nextInt());
|
||||
|
||||
// handle hundredth seconds
|
||||
calendar.set(Calendar.MILLISECOND, scanner.nextInt() * 10);
|
||||
|
||||
// update position
|
||||
pos.setIndex(scanner.match().end());
|
||||
|
||||
return calendar.getTime();
|
||||
}
|
||||
}
|
|
@ -2,19 +2,20 @@
|
|||
package net.sourceforge.filebot;
|
||||
|
||||
|
||||
import net.sourceforge.filebot.format.ExpressionFormatTest;
|
||||
import net.sourceforge.filebot.hash.VerificationFormatTest;
|
||||
import net.sourceforge.filebot.similarity.SimilarityTestSuite;
|
||||
import net.sourceforge.filebot.ui.panel.rename.MatchModelTest;
|
||||
import net.sourceforge.filebot.web.WebTestSuite;
|
||||
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Suite;
|
||||
import org.junit.runners.Suite.SuiteClasses;
|
||||
|
||||
import net.sourceforge.filebot.format.ExpressionFormatTest;
|
||||
import net.sourceforge.filebot.hash.VerificationFormatTest;
|
||||
import net.sourceforge.filebot.similarity.SimilarityTestSuite;
|
||||
import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite;
|
||||
import net.sourceforge.filebot.ui.panel.rename.MatchModelTest;
|
||||
import net.sourceforge.filebot.web.WebTestSuite;
|
||||
|
||||
|
||||
@RunWith(Suite.class)
|
||||
@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class })
|
||||
@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ArgumentBeanTest.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, SubtitleReaderTestSuite.class })
|
||||
public class FileBotTestSuite {
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.junit.*;
|
||||
|
||||
|
||||
public class MicroDVDReaderTest {
|
||||
|
||||
@Test
|
||||
public void parse() throws Exception {
|
||||
MicroDVDReader reader = new MicroDVDReader(new Scanner("{856}{900}what's the plan?"));
|
||||
|
||||
SubtitleElement element = reader.next();
|
||||
|
||||
assertEquals(856 * 23.976, element.getStart(), 1);
|
||||
assertEquals(900 * 23.976, element.getEnd(), 1);
|
||||
assertEquals("what's the plan?", element.getText());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void fps() throws Exception {
|
||||
MicroDVDReader reader = new MicroDVDReader(new Scanner("{1}{1}100\n{300}{400} trim me "));
|
||||
|
||||
SubtitleElement element = reader.next();
|
||||
|
||||
assertEquals(300 * 100, element.getStart(), 0);
|
||||
assertEquals(400 * 100, element.getEnd(), 0);
|
||||
assertEquals("trim me", element.getText());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void newline() throws Exception {
|
||||
MicroDVDReader reader = new MicroDVDReader(new Scanner("\n\n{300}{400} l1|l2|l3| \n\n"));
|
||||
|
||||
String[] lines = reader.next().getText().split("\\n");
|
||||
|
||||
assertEquals(3, lines.length);
|
||||
assertEquals("l1", lines[0]);
|
||||
assertEquals("l2", lines[1]);
|
||||
assertEquals("l3", lines[2]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Scanner;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class SubRipReaderTest {
|
||||
|
||||
@Test
|
||||
public void parse() throws Exception {
|
||||
LinkedList<SubtitleElement> list = new LinkedList<SubtitleElement>();
|
||||
|
||||
URL resource = new URL("http://www.opensubtitles.org/en/download/file/1951733951.gz");
|
||||
InputStream stream = new GZIPInputStream(resource.openStream());
|
||||
|
||||
SubRipReader reader = new SubRipReader(new Scanner(stream, "UTF-8"));
|
||||
|
||||
try {
|
||||
while (reader.hasNext()) {
|
||||
list.add(reader.next());
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
assertEquals(499, list.size(), 0);
|
||||
|
||||
assertEquals(3455, list.getFirst().getStart(), 0);
|
||||
assertEquals(6799, list.getFirst().getEnd(), 0);
|
||||
|
||||
assertEquals("Come with me if you want to live.", list.get(253).getText());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
package net.sourceforge.filebot.subtitle;
|
||||
|
||||
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Suite;
|
||||
import org.junit.runners.Suite.SuiteClasses;
|
||||
|
||||
|
||||
@RunWith(Suite.class)
|
||||
@SuiteClasses( { SubRipReaderTest.class, MicroDVDReaderTest.class })
|
||||
public class SubtitleReaderTestSuite {
|
||||
|
||||
}
|
Loading…
Reference in New Issue