From ae96a2a55c9c40b2a68ac346bd8dca3967b813a7 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Tue, 14 Feb 2017 02:33:21 +0800 Subject: [PATCH] Refactor SubtitleFormat and add SAMI support (read-only) --- .../net/filebot/subtitle/MicroDVDReader.java | 10 ++---- .../{SamiReader.java => SamiDecoder.java} | 5 +-- source/net/filebot/subtitle/SubRipReader.java | 10 ++---- .../subtitle/SubStationAlphaReader.java | 10 ++---- .../net/filebot/subtitle/SubViewerReader.java | 10 ++---- .../net/filebot/subtitle/SubtitleDecoder.java | 9 +++++ .../net/filebot/subtitle/SubtitleFormat.java | 35 ++++++++++++++----- .../net/filebot/subtitle/SubtitleReader.java | 19 +++++----- .../filebot/subtitle/SubtitleUtilities.java | 22 ++++-------- .../filebot/subtitle/MicroDVDReaderTest.java | 12 +++---- 10 files changed, 71 insertions(+), 71 deletions(-) rename source/net/filebot/subtitle/{SamiReader.java => SamiDecoder.java} (95%) create mode 100644 source/net/filebot/subtitle/SubtitleDecoder.java diff --git a/source/net/filebot/subtitle/MicroDVDReader.java b/source/net/filebot/subtitle/MicroDVDReader.java index ad243ba6..e63711c3 100644 --- a/source/net/filebot/subtitle/MicroDVDReader.java +++ b/source/net/filebot/subtitle/MicroDVDReader.java @@ -5,18 +5,14 @@ import static net.filebot.util.StringUtilities.*; import java.util.ArrayList; import java.util.List; +import java.util.Scanner; public class MicroDVDReader extends SubtitleReader { private double fps = 23.976; - public MicroDVDReader(Readable source) { - super(source); - } - - @Override - public String getFormatName() { - return "MicroDVD"; + public MicroDVDReader(Scanner scanner) { + super(scanner); } @Override diff --git a/source/net/filebot/subtitle/SamiReader.java b/source/net/filebot/subtitle/SamiDecoder.java similarity index 95% rename from source/net/filebot/subtitle/SamiReader.java rename to source/net/filebot/subtitle/SamiDecoder.java index 9bfa21a7..e84c4534 100644 --- a/source/net/filebot/subtitle/SamiReader.java +++ b/source/net/filebot/subtitle/SamiDecoder.java @@ -13,9 +13,10 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -public class SamiReader { +public class SamiDecoder implements SubtitleDecoder { - public List decode(CharSequence file) { + @Override + public List decode(String file) { List subtitles = new ArrayList(); Matcher matcher = Pattern.compile("", Pattern.CASE_INSENSITIVE).matcher(file); diff --git a/source/net/filebot/subtitle/SubRipReader.java b/source/net/filebot/subtitle/SubRipReader.java index 05d34f82..1ccae30b 100644 --- a/source/net/filebot/subtitle/SubRipReader.java +++ b/source/net/filebot/subtitle/SubRipReader.java @@ -7,6 +7,7 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.Scanner; import java.util.TimeZone; import java.util.regex.Pattern; @@ -15,8 +16,8 @@ public class SubRipReader extends SubtitleReader { private final DateFormat timeFormat; private final Pattern tag; - public SubRipReader(Readable source) { - super(source); + public SubRipReader(Scanner scanner) { + super(scanner); // format used to parse time stamps (e.g. 00:02:26,407 --> 00:02:31,356) timeFormat = new SimpleDateFormat("HH:mm:ss,SSS", Locale.ROOT); @@ -26,11 +27,6 @@ public class SubRipReader extends SubtitleReader { tag = Pattern.compile("]*)>", Pattern.CASE_INSENSITIVE); } - @Override - public String getFormatName() { - return "SubRip"; - } - @Override protected SubtitleElement readNext() throws Exception { String number = scanner.nextLine(); diff --git a/source/net/filebot/subtitle/SubStationAlphaReader.java b/source/net/filebot/subtitle/SubStationAlphaReader.java index e4120dc6..3ac80077 100644 --- a/source/net/filebot/subtitle/SubStationAlphaReader.java +++ b/source/net/filebot/subtitle/SubStationAlphaReader.java @@ -6,6 +6,7 @@ import static java.util.Arrays.*; import java.text.DateFormat; import java.util.InputMismatchException; import java.util.List; +import java.util.Scanner; import java.util.regex.Pattern; public class SubStationAlphaReader extends SubtitleReader { @@ -20,13 +21,8 @@ public class SubStationAlphaReader extends SubtitleReader { private int formatIndexEnd; private int formatIndexText; - public SubStationAlphaReader(Readable source) { - super(source); - } - - @Override - public String getFormatName() { - return "SubStationAlpha"; + public SubStationAlphaReader(Scanner scanner) { + super(scanner); } private void readFormat() throws Exception { diff --git a/source/net/filebot/subtitle/SubViewerReader.java b/source/net/filebot/subtitle/SubViewerReader.java index 18ddc300..b6609588 100644 --- a/source/net/filebot/subtitle/SubViewerReader.java +++ b/source/net/filebot/subtitle/SubViewerReader.java @@ -7,6 +7,7 @@ import static net.filebot.util.StringUtilities.*; import java.text.DateFormat; import java.text.ParseException; import java.util.InputMismatchException; +import java.util.Scanner; import java.util.regex.Pattern; public class SubViewerReader extends SubtitleReader { @@ -14,13 +15,8 @@ public class SubViewerReader extends SubtitleReader { private final DateFormat timeFormat = new SubtitleTimeFormat(); private final Pattern newline = compile(quote("[br]"), CASE_INSENSITIVE); - public SubViewerReader(Readable source) { - super(source); - } - - @Override - public String getFormatName() { - return "SubViewer"; + public SubViewerReader(Scanner scanner) { + super(scanner); } @Override diff --git a/source/net/filebot/subtitle/SubtitleDecoder.java b/source/net/filebot/subtitle/SubtitleDecoder.java new file mode 100644 index 00000000..42e7cae6 --- /dev/null +++ b/source/net/filebot/subtitle/SubtitleDecoder.java @@ -0,0 +1,9 @@ +package net.filebot.subtitle; + +import java.util.List; + +public interface SubtitleDecoder { + + List decode(String file); + +} \ No newline at end of file diff --git a/source/net/filebot/subtitle/SubtitleFormat.java b/source/net/filebot/subtitle/SubtitleFormat.java index 3e4fca5f..69bbf4b6 100644 --- a/source/net/filebot/subtitle/SubtitleFormat.java +++ b/source/net/filebot/subtitle/SubtitleFormat.java @@ -1,6 +1,10 @@ package net.filebot.subtitle; +import static java.util.stream.Collectors.*; + +import java.util.Scanner; + import net.filebot.MediaTypes; import net.filebot.util.FileUtilities.ExtensionFileFilter; @@ -9,8 +13,8 @@ public enum SubtitleFormat { SubRip { @Override - public SubtitleReader newReader(Readable readable) { - return new SubRipReader(readable); + public SubtitleDecoder getDecoder() { + return content -> new SubRipReader(new Scanner(content)).stream().collect(toList()); } @Override @@ -22,8 +26,8 @@ public enum SubtitleFormat { MicroDVD { @Override - public SubtitleReader newReader(Readable readable) { - return new MicroDVDReader(readable); + public SubtitleDecoder getDecoder() { + return content -> new MicroDVDReader(new Scanner(content)).stream().collect(toList()); } @Override @@ -35,8 +39,8 @@ public enum SubtitleFormat { SubViewer { @Override - public SubtitleReader newReader(Readable readable) { - return new SubViewerReader(readable); + public SubtitleDecoder getDecoder() { + return content -> new SubViewerReader(new Scanner(content)).stream().collect(toList()); } @Override @@ -48,17 +52,30 @@ public enum SubtitleFormat { SubStationAlpha { @Override - public SubtitleReader newReader(Readable readable) { - return new SubStationAlphaReader(readable); + public SubtitleDecoder getDecoder() { + return content -> new SubStationAlphaReader(new Scanner(content)).stream().collect(toList()); } @Override public ExtensionFileFilter getFilter() { return MediaTypes.getTypeFilter("subtitle/SubStationAlpha"); } + }, + + SAMI { + + @Override + public SubtitleDecoder getDecoder() { + return new SamiDecoder(); + } + + @Override + public ExtensionFileFilter getFilter() { + return MediaTypes.getTypeFilter("subtitle/SAMI"); + } }; - public abstract SubtitleReader newReader(Readable readable); + public abstract SubtitleDecoder getDecoder(); public abstract ExtensionFileFilter getFilter(); diff --git a/source/net/filebot/subtitle/SubtitleReader.java b/source/net/filebot/subtitle/SubtitleReader.java index b3bd8b16..ee4b3802 100644 --- a/source/net/filebot/subtitle/SubtitleReader.java +++ b/source/net/filebot/subtitle/SubtitleReader.java @@ -7,18 +7,20 @@ import java.io.IOException; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.Scanner; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; public abstract class SubtitleReader implements Iterator, Closeable { - protected final Scanner scanner; + protected Scanner scanner; protected SubtitleElement current; - public SubtitleReader(Readable source) { - this.scanner = new Scanner(source); + public SubtitleReader(Scanner scanner) { + this.scanner = scanner; } - public abstract String getFormatName(); - protected abstract SubtitleElement readNext() throws Exception; @Override @@ -28,7 +30,7 @@ public abstract class SubtitleReader implements Iterator, Close try { current = readNext(); } catch (Exception e) { - debug.warning(format("%s: %s", getFormatName(), e.getMessage())); // log and ignore + debug.warning(cause(e)); // log and ignore } } @@ -53,9 +55,8 @@ public abstract class SubtitleReader implements Iterator, Close scanner.close(); } - @Override - public void remove() { - throw new UnsupportedOperationException(); + public Stream stream() { + return StreamSupport.stream(Spliterators.spliteratorUnknownSize(this, Spliterator.ORDERED), false); } } diff --git a/source/net/filebot/subtitle/SubtitleUtilities.java b/source/net/filebot/subtitle/SubtitleUtilities.java index b18061c3..da5d7a2b 100644 --- a/source/net/filebot/subtitle/SubtitleUtilities.java +++ b/source/net/filebot/subtitle/SubtitleUtilities.java @@ -324,24 +324,16 @@ public final class SubtitleUtilities { likelyFormats.addLast(format); } + // decode bytes and beware of byte-order marks + Reader reader = createTextReader(new ByteBufferInputStream(file.getData()), true, UTF_8); + String content = IOUtils.toString(reader); + // decode subtitle file with the first reader that seems to work for (SubtitleFormat format : likelyFormats) { - // decode bytes and beware of byte-order marks - Reader reader = createTextReader(new ByteBufferInputStream(file.getData()), true, UTF_8); + List subtitles = format.getDecoder().decode(content); - // reset reader to position 0 - SubtitleReader parser = format.newReader(reader); - - if (parser.hasNext()) { - // correct format found - List list = new ArrayList(500); - - // read subtitle file - while (parser.hasNext()) { - list.add(parser.next()); - } - - return list; + if (subtitles.size() > 0) { + return subtitles; } } diff --git a/test/net/filebot/subtitle/MicroDVDReaderTest.java b/test/net/filebot/subtitle/MicroDVDReaderTest.java index a3045269..15090458 100644 --- a/test/net/filebot/subtitle/MicroDVDReaderTest.java +++ b/test/net/filebot/subtitle/MicroDVDReaderTest.java @@ -1,19 +1,17 @@ package net.filebot.subtitle; - import static org.junit.Assert.*; -import java.io.StringReader; +import java.util.Scanner; import org.junit.Test; - public class MicroDVDReaderTest { @Test public void parse() throws Exception { - MicroDVDReader reader = new MicroDVDReader(new StringReader("{856}{900}what's the plan?")); + MicroDVDReader reader = new MicroDVDReader(new Scanner("{856}{900}what's the plan?")); SubtitleElement element = reader.next(); @@ -22,10 +20,9 @@ public class MicroDVDReaderTest { assertEquals("what's the plan?", element.getText()); } - @Test public void fps() throws Exception { - MicroDVDReader reader = new MicroDVDReader(new StringReader("{1}{1}100\n{300}{400} trim me ")); + MicroDVDReader reader = new MicroDVDReader(new Scanner("{1}{1}100\n{300}{400} trim me ")); SubtitleElement element = reader.next(); @@ -34,10 +31,9 @@ public class MicroDVDReaderTest { assertEquals("trim me", element.getText()); } - @Test public void newline() throws Exception { - MicroDVDReader reader = new MicroDVDReader(new StringReader("\n\n{300}{400} l1|l2|l3| \n\n")); + MicroDVDReader reader = new MicroDVDReader(new Scanner("\n\n{300}{400} l1|l2|l3| \n\n")); String[] lines = reader.next().getText().split("\\n");