Refactor SubtitleFormat and add SAMI support (read-only)

This commit is contained in:
Reinhard Pointner 2017-02-14 02:33:21 +08:00
parent 3ac78751b6
commit ae96a2a55c
10 changed files with 71 additions and 71 deletions

View File

@ -5,18 +5,14 @@ import static net.filebot.util.StringUtilities.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Scanner;
public class MicroDVDReader extends SubtitleReader { public class MicroDVDReader extends SubtitleReader {
private double fps = 23.976; private double fps = 23.976;
public MicroDVDReader(Readable source) { public MicroDVDReader(Scanner scanner) {
super(source); super(scanner);
}
@Override
public String getFormatName() {
return "MicroDVD";
} }
@Override @Override

View File

@ -13,9 +13,10 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
public class SamiReader { public class SamiDecoder implements SubtitleDecoder {
public List<SubtitleElement> decode(CharSequence file) { @Override
public List<SubtitleElement> decode(String file) {
List<SubtitleElement> subtitles = new ArrayList<SubtitleElement>(); List<SubtitleElement> subtitles = new ArrayList<SubtitleElement>();
Matcher matcher = Pattern.compile("<SYNC(.*?)>", Pattern.CASE_INSENSITIVE).matcher(file); Matcher matcher = Pattern.compile("<SYNC(.*?)>", Pattern.CASE_INSENSITIVE).matcher(file);

View File

@ -7,6 +7,7 @@ import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Scanner;
import java.util.TimeZone; import java.util.TimeZone;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -15,8 +16,8 @@ public class SubRipReader extends SubtitleReader {
private final DateFormat timeFormat; private final DateFormat timeFormat;
private final Pattern tag; private final Pattern tag;
public SubRipReader(Readable source) { public SubRipReader(Scanner scanner) {
super(source); super(scanner);
// format used to parse time stamps (e.g. 00:02:26,407 --> 00:02:31,356) // format used to parse time stamps (e.g. 00:02:26,407 --> 00:02:31,356)
timeFormat = new SimpleDateFormat("HH:mm:ss,SSS", Locale.ROOT); timeFormat = new SimpleDateFormat("HH:mm:ss,SSS", Locale.ROOT);
@ -26,11 +27,6 @@ public class SubRipReader extends SubtitleReader {
tag = Pattern.compile("</?(b|u|i|font[^<>]*)>", Pattern.CASE_INSENSITIVE); tag = Pattern.compile("</?(b|u|i|font[^<>]*)>", Pattern.CASE_INSENSITIVE);
} }
@Override
public String getFormatName() {
return "SubRip";
}
@Override @Override
protected SubtitleElement readNext() throws Exception { protected SubtitleElement readNext() throws Exception {
String number = scanner.nextLine(); String number = scanner.nextLine();

View File

@ -6,6 +6,7 @@ import static java.util.Arrays.*;
import java.text.DateFormat; import java.text.DateFormat;
import java.util.InputMismatchException; import java.util.InputMismatchException;
import java.util.List; import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class SubStationAlphaReader extends SubtitleReader { public class SubStationAlphaReader extends SubtitleReader {
@ -20,13 +21,8 @@ public class SubStationAlphaReader extends SubtitleReader {
private int formatIndexEnd; private int formatIndexEnd;
private int formatIndexText; private int formatIndexText;
public SubStationAlphaReader(Readable source) { public SubStationAlphaReader(Scanner scanner) {
super(source); super(scanner);
}
@Override
public String getFormatName() {
return "SubStationAlpha";
} }
private void readFormat() throws Exception { private void readFormat() throws Exception {

View File

@ -7,6 +7,7 @@ import static net.filebot.util.StringUtilities.*;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParseException; import java.text.ParseException;
import java.util.InputMismatchException; import java.util.InputMismatchException;
import java.util.Scanner;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class SubViewerReader extends SubtitleReader { public class SubViewerReader extends SubtitleReader {
@ -14,13 +15,8 @@ public class SubViewerReader extends SubtitleReader {
private final DateFormat timeFormat = new SubtitleTimeFormat(); private final DateFormat timeFormat = new SubtitleTimeFormat();
private final Pattern newline = compile(quote("[br]"), CASE_INSENSITIVE); private final Pattern newline = compile(quote("[br]"), CASE_INSENSITIVE);
public SubViewerReader(Readable source) { public SubViewerReader(Scanner scanner) {
super(source); super(scanner);
}
@Override
public String getFormatName() {
return "SubViewer";
} }
@Override @Override

View File

@ -0,0 +1,9 @@
package net.filebot.subtitle;
import java.util.List;
public interface SubtitleDecoder {
List<SubtitleElement> decode(String file);
}

View File

@ -1,6 +1,10 @@
package net.filebot.subtitle; package net.filebot.subtitle;
import static java.util.stream.Collectors.*;
import java.util.Scanner;
import net.filebot.MediaTypes; import net.filebot.MediaTypes;
import net.filebot.util.FileUtilities.ExtensionFileFilter; import net.filebot.util.FileUtilities.ExtensionFileFilter;
@ -9,8 +13,8 @@ public enum SubtitleFormat {
SubRip { SubRip {
@Override @Override
public SubtitleReader newReader(Readable readable) { public SubtitleDecoder getDecoder() {
return new SubRipReader(readable); return content -> new SubRipReader(new Scanner(content)).stream().collect(toList());
} }
@Override @Override
@ -22,8 +26,8 @@ public enum SubtitleFormat {
MicroDVD { MicroDVD {
@Override @Override
public SubtitleReader newReader(Readable readable) { public SubtitleDecoder getDecoder() {
return new MicroDVDReader(readable); return content -> new MicroDVDReader(new Scanner(content)).stream().collect(toList());
} }
@Override @Override
@ -35,8 +39,8 @@ public enum SubtitleFormat {
SubViewer { SubViewer {
@Override @Override
public SubtitleReader newReader(Readable readable) { public SubtitleDecoder getDecoder() {
return new SubViewerReader(readable); return content -> new SubViewerReader(new Scanner(content)).stream().collect(toList());
} }
@Override @Override
@ -48,17 +52,30 @@ public enum SubtitleFormat {
SubStationAlpha { SubStationAlpha {
@Override @Override
public SubtitleReader newReader(Readable readable) { public SubtitleDecoder getDecoder() {
return new SubStationAlphaReader(readable); return content -> new SubStationAlphaReader(new Scanner(content)).stream().collect(toList());
} }
@Override @Override
public ExtensionFileFilter getFilter() { public ExtensionFileFilter getFilter() {
return MediaTypes.getTypeFilter("subtitle/SubStationAlpha"); return MediaTypes.getTypeFilter("subtitle/SubStationAlpha");
} }
},
SAMI {
@Override
public SubtitleDecoder getDecoder() {
return new SamiDecoder();
}
@Override
public ExtensionFileFilter getFilter() {
return MediaTypes.getTypeFilter("subtitle/SAMI");
}
}; };
public abstract SubtitleReader newReader(Readable readable); public abstract SubtitleDecoder getDecoder();
public abstract ExtensionFileFilter getFilter(); public abstract ExtensionFileFilter getFilter();

View File

@ -7,18 +7,20 @@ import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Scanner; import java.util.Scanner;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public abstract class SubtitleReader implements Iterator<SubtitleElement>, Closeable { public abstract class SubtitleReader implements Iterator<SubtitleElement>, Closeable {
protected final Scanner scanner; protected Scanner scanner;
protected SubtitleElement current; protected SubtitleElement current;
public SubtitleReader(Readable source) { public SubtitleReader(Scanner scanner) {
this.scanner = new Scanner(source); this.scanner = scanner;
} }
public abstract String getFormatName();
protected abstract SubtitleElement readNext() throws Exception; protected abstract SubtitleElement readNext() throws Exception;
@Override @Override
@ -28,7 +30,7 @@ public abstract class SubtitleReader implements Iterator<SubtitleElement>, Close
try { try {
current = readNext(); current = readNext();
} catch (Exception e) { } catch (Exception e) {
debug.warning(format("%s: %s", getFormatName(), e.getMessage())); // log and ignore debug.warning(cause(e)); // log and ignore
} }
} }
@ -53,9 +55,8 @@ public abstract class SubtitleReader implements Iterator<SubtitleElement>, Close
scanner.close(); scanner.close();
} }
@Override public Stream<SubtitleElement> stream() {
public void remove() { return StreamSupport.stream(Spliterators.spliteratorUnknownSize(this, Spliterator.ORDERED), false);
throw new UnsupportedOperationException();
} }
} }

View File

@ -324,24 +324,16 @@ public final class SubtitleUtilities {
likelyFormats.addLast(format); likelyFormats.addLast(format);
} }
// decode subtitle file with the first reader that seems to work
for (SubtitleFormat format : likelyFormats) {
// decode bytes and beware of byte-order marks // decode bytes and beware of byte-order marks
Reader reader = createTextReader(new ByteBufferInputStream(file.getData()), true, UTF_8); Reader reader = createTextReader(new ByteBufferInputStream(file.getData()), true, UTF_8);
String content = IOUtils.toString(reader);
// reset reader to position 0 // decode subtitle file with the first reader that seems to work
SubtitleReader parser = format.newReader(reader); for (SubtitleFormat format : likelyFormats) {
List<SubtitleElement> subtitles = format.getDecoder().decode(content);
if (parser.hasNext()) { if (subtitles.size() > 0) {
// correct format found return subtitles;
List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
// read subtitle file
while (parser.hasNext()) {
list.add(parser.next());
}
return list;
} }
} }

View File

@ -1,19 +1,17 @@
package net.filebot.subtitle; package net.filebot.subtitle;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.io.StringReader; import java.util.Scanner;
import org.junit.Test; import org.junit.Test;
public class MicroDVDReaderTest { public class MicroDVDReaderTest {
@Test @Test
public void parse() throws Exception { public void parse() throws Exception {
MicroDVDReader reader = new MicroDVDReader(new StringReader("{856}{900}what's the plan?")); MicroDVDReader reader = new MicroDVDReader(new Scanner("{856}{900}what's the plan?"));
SubtitleElement element = reader.next(); SubtitleElement element = reader.next();
@ -22,10 +20,9 @@ public class MicroDVDReaderTest {
assertEquals("what's the plan?", element.getText()); assertEquals("what's the plan?", element.getText());
} }
@Test @Test
public void fps() throws Exception { public void fps() throws Exception {
MicroDVDReader reader = new MicroDVDReader(new StringReader("{1}{1}100\n{300}{400} trim me ")); MicroDVDReader reader = new MicroDVDReader(new Scanner("{1}{1}100\n{300}{400} trim me "));
SubtitleElement element = reader.next(); SubtitleElement element = reader.next();
@ -34,10 +31,9 @@ public class MicroDVDReaderTest {
assertEquals("trim me", element.getText()); assertEquals("trim me", element.getText());
} }
@Test @Test
public void newline() throws Exception { public void newline() throws Exception {
MicroDVDReader reader = new MicroDVDReader(new StringReader("\n\n{300}{400} l1|l2|l3| \n\n")); MicroDVDReader reader = new MicroDVDReader(new Scanner("\n\n{300}{400} l1|l2|l3| \n\n"));
String[] lines = reader.next().getText().split("\\n"); String[] lines = reader.next().getText().split("\\n");