diff --git a/source/net/sourceforge/filebot/cli/ArgumentBean.java b/source/net/sourceforge/filebot/cli/ArgumentBean.java index dbf8a835..062edecc 100644 --- a/source/net/sourceforge/filebot/cli/ArgumentBean.java +++ b/source/net/sourceforge/filebot/cli/ArgumentBean.java @@ -7,6 +7,7 @@ import static net.sourceforge.tuned.FileUtilities.*; import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; @@ -26,11 +27,11 @@ public class ArgumentBean { @Option(name = "-rename", usage = "Rename episode/movie files", metaVar = "fileset") public boolean rename = false; - @Option(name = "--db", usage = "Episode/Movie database", metaVar = "[TVRage, AniDB, TheTVDB] or [OpenSubtitles, TheMovieDB]") - public String db = null; + @Option(name = "--db", usage = "Episode/Movie database", metaVar = "TVRage, AniDB, TheTVDB, OpenSubtitles, TheMovieDB") + public String db; @Option(name = "--format", usage = "Episode naming scheme", metaVar = "expression") - public String format = "{n} - {s+'x'}{e.pad(2)} - {t}"; + public String format; @Option(name = "-non-strict", usage = "Use less strict matching") public boolean nonStrict = false; @@ -39,7 +40,7 @@ public class ArgumentBean { public boolean getSubtitles; @Option(name = "--q", usage = "Search query", metaVar = "title") - public String query = null; + public String query; @Option(name = "--lang", usage = "Language", metaVar = "2-letter language code") public String lang = "en"; @@ -47,10 +48,13 @@ public class ArgumentBean { @Option(name = "-check", usage = "Create/Check verification file", metaVar = "fileset") public boolean check; - @Option(name = "--output", usage = "Output options", metaVar = "[sfv, md5, sha1]") - public String output = "sfv"; + @Option(name = "--output", usage = "Output options", metaVar = "sfv, md5, sha1") + public String output; - @Option(name = "--log", usage = "Log level", metaVar = "[all, config, info, warning]") + @Option(name = "--encoding", usage = "Character encoding", metaVar = "UTF-8, windows-1252, GB18030") + public String encoding; + + @Option(name = "--log", usage = "Log level", metaVar = "all, config, info, warning") public String log = "all"; @Option(name = "-help", usage = "Print this help message") @@ -87,7 +91,7 @@ public class ArgumentBean { public ExpressionFormat getEpisodeFormat() throws ScriptException { - return new ExpressionFormat(format); + return format != null ? new ExpressionFormat(format) : null; } @@ -101,6 +105,11 @@ public class ArgumentBean { } + public Charset getEncoding() { + return encoding != null ? Charset.forName(encoding) : null; + } + + public Level getLogLevel() { return Level.parse(log.toUpperCase()); } diff --git a/source/net/sourceforge/filebot/cli/ArgumentProcessor.java b/source/net/sourceforge/filebot/cli/ArgumentProcessor.java index d1b6d456..4157a8bf 100644 --- a/source/net/sourceforge/filebot/cli/ArgumentProcessor.java +++ b/source/net/sourceforge/filebot/cli/ArgumentProcessor.java @@ -7,11 +7,13 @@ import static net.sourceforge.filebot.MediaTypes.*; import static net.sourceforge.filebot.WebServices.*; import static net.sourceforge.filebot.cli.CLILogging.*; import static net.sourceforge.filebot.hash.VerificationUtilities.*; +import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*; import static net.sourceforge.tuned.FileUtilities.*; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; @@ -37,12 +39,14 @@ import net.sourceforge.filebot.similarity.Matcher; import net.sourceforge.filebot.similarity.NameSimilarityMetric; import net.sourceforge.filebot.similarity.SeriesNameMatcher; import net.sourceforge.filebot.similarity.SimilarityMetric; +import net.sourceforge.filebot.subtitle.SubtitleFormat; import net.sourceforge.filebot.ui.Language; import net.sourceforge.filebot.ui.panel.rename.HistorySpooler; import net.sourceforge.filebot.ui.panel.rename.MatchSimilarityMetric; import net.sourceforge.filebot.vfs.ArchiveType; import net.sourceforge.filebot.vfs.MemoryFile; import net.sourceforge.filebot.web.Episode; +import net.sourceforge.filebot.web.EpisodeFormat; import net.sourceforge.filebot.web.EpisodeListProvider; import net.sourceforge.filebot.web.MovieDescriptor; import net.sourceforge.filebot.web.MovieIdentificationService; @@ -60,7 +64,7 @@ public class ArgumentProcessor { Set files = new LinkedHashSet(args.getFiles(true)); if (args.getSubtitles) { - List subtitles = getSubtitles(files, args.query, args.getLanguage()); + List subtitles = getSubtitles(files, args.query, args.getLanguage(), args.output, args.getEncoding()); files.addAll(subtitles); } @@ -69,7 +73,7 @@ public class ArgumentProcessor { } if (args.check) { - check(files, args.output); + check(files, args.output, args.getEncoding()); } CLILogger.finest("Done ヾ(@⌒ー⌒@)ノ"); @@ -168,7 +172,8 @@ public class ArgumentProcessor { for (Match match : matches) { File file = match.getValue(); - String newName = format.format(new EpisodeBindingBean(match.getCandidate(), file)); + Episode episode = match.getCandidate(); + String newName = (format != null) ? format.format(new EpisodeBindingBean(episode, file)) : EpisodeFormat.SeasonEpisode.format(episode); if (isInvalidFileName(newName)) { CLILogger.config("Stripping invalid characters from new name: " + newName); @@ -234,7 +239,7 @@ public class ArgumentProcessor { } - public List getSubtitles(Collection files, String query, Language language) throws Exception { + public List getSubtitles(Collection files, String query, Language language, String output, Charset outputEncoding) throws Exception { // match movie hashes online Set videos = new TreeSet(filter(files, VIDEO_FILES)); List downloadedSubtitles = new ArrayList(); @@ -243,6 +248,17 @@ public class ArgumentProcessor { throw new IllegalArgumentException("No video files: " + files); } + SubtitleFormat outputFormat = null; + if (output != null) { + outputFormat = getSubtitleFormatByName(output); + + // when rewriting subtitles to target format an encoding must be defined, default to UTF-8 + if (outputEncoding == null) + outputEncoding = Charset.forName("UTF-8"); + + CLILogger.config(format("Export as: %s (%s)", outputFormat, outputEncoding.displayName(Locale.ROOT))); + } + // lookup subtitles by hash for (VideoHashSubtitleService service : WebServices.getVideoHashSubtitleServices()) { if (videos.isEmpty()) @@ -253,7 +269,7 @@ public class ArgumentProcessor { for (Entry> it : service.getSubtitleList(videos.toArray(new File[0]), language.getName()).entrySet()) { if (it.getValue() != null && it.getValue().size() > 0) { // auto-select first element if there are multiple hash matches for the same video files - File subtitle = fetchSubtitle(it.getValue().get(0), it.getKey()); + File subtitle = fetchSubtitle(it.getValue().get(0), it.getKey(), outputFormat, outputEncoding); // download complete, cross this video off the list videos.remove(it.getKey()); @@ -277,7 +293,7 @@ public class ArgumentProcessor { for (SubtitleDescriptor descriptor : subtitles) { if (filename.equalsIgnoreCase(descriptor.getName())) { - File subtitle = fetchSubtitle(descriptor, video); + File subtitle = fetchSubtitle(descriptor, video, outputFormat, outputEncoding); // download complete, cross this video off the list videos.remove(video); @@ -300,30 +316,37 @@ public class ArgumentProcessor { } - private File fetchSubtitle(SubtitleDescriptor descriptor, File movieFile) throws Exception { + private File fetchSubtitle(SubtitleDescriptor descriptor, File movieFile, SubtitleFormat outputFormat, Charset outputEncoding) throws Exception { // fetch subtitle archive CLILogger.info(format("Fetching [%s.%s]", descriptor.getName(), descriptor.getType())); ByteBuffer downloadedData = descriptor.fetch(); // extract subtitles from archive ArchiveType type = ArchiveType.forName(descriptor.getType()); - MemoryFile subtitleData; + MemoryFile subtitleFile; if (type != ArchiveType.UNDEFINED) { // extract subtitle from archive - subtitleData = type.fromData(downloadedData).iterator().next(); + subtitleFile = type.fromData(downloadedData).iterator().next(); } else { // assume that the fetched data is the subtitle - subtitleData = new MemoryFile(descriptor.getName() + "." + descriptor.getType(), downloadedData); + subtitleFile = new MemoryFile(descriptor.getName() + "." + descriptor.getType(), downloadedData); } // subtitle filename is based on movie filename - String subtitleFileName = getNameWithoutExtension(movieFile.getName()) + "." + getExtension(subtitleData.getName()); - File destination = new File(movieFile.getParentFile(), validateFileName(subtitleFileName)); + String name = getName(movieFile); + String ext = getExtension(subtitleFile.getName()); + ByteBuffer data = subtitleFile.getData(); - CLILogger.config(format("Writing [%s] to [%s]", subtitleData.getName(), destination.getName())); - writeFile(subtitleData.getData(), destination); + if (outputFormat != null || outputEncoding != null) { + ext = outputFormat.getFilter().extension(); // adjust extension of the output file + data = exportSubtitles(subtitleFile, outputFormat, 0, outputEncoding); + } + File destination = new File(movieFile.getParentFile(), name + "." + ext); + CLILogger.config(format("Writing [%s] to [%s]", subtitleFile.getName(), destination.getName())); + + writeFile(data, destination); return destination; } @@ -435,7 +458,7 @@ public class ArgumentProcessor { } - public void check(Collection files, String output) throws Exception { + public void check(Collection files, String output, Charset outputEncoding) throws Exception { // check verification file if (containsOnly(files, MediaTypes.getDefaultFilter("verification"))) { // only check existing hashes @@ -474,19 +497,19 @@ public class ArgumentProcessor { } else { // auto-select the filename based on folder and type hashType = (output != null) ? getHashTypeByExtension(output) : HashType.SFV; - outputFile = new File(root, root.getName() + "." + hashType.getFilter().extensions()[0]); + outputFile = new File(root, root.getName() + "." + hashType.getFilter().extension()); } - CLILogger.config("Using output file: " + outputFile); if (hashType == null) { throw new IllegalArgumentException("Illegal output type: " + output); } - compute(root.getPath(), files, outputFile, hashType); + CLILogger.config("Using output file: " + outputFile); + compute(root.getPath(), files, outputFile, hashType, outputEncoding); } - public boolean check(File verificationFile, File parent) throws Exception { + private boolean check(File verificationFile, File root) throws Exception { HashType type = getHashType(verificationFile); // check if type is supported @@ -503,8 +526,8 @@ public class ArgumentProcessor { try { Entry it = parser.next(); - File file = new File(parent, it.getKey().getPath()).getAbsoluteFile(); - String current = computeHash(new File(parent, it.getKey().getPath()), type); + File file = new File(root, it.getKey().getPath()).getAbsoluteFile(); + String current = computeHash(new File(root, it.getKey().getPath()), type); CLILogger.info(format("%s %s", current, file)); if (current.compareToIgnoreCase(it.getValue()) != 0) { @@ -523,9 +546,9 @@ public class ArgumentProcessor { } - private void compute(String root, Collection files, File outputFile, HashType hashType) throws IOException, Exception { + private void compute(String root, Collection files, File outputFile, HashType hashType, Charset outputEncoding) throws IOException, Exception { // compute hashes recursively and write to file - VerificationFileWriter out = new VerificationFileWriter(outputFile, hashType.getFormat(), "UTF-8"); + VerificationFileWriter out = new VerificationFileWriter(outputFile, hashType.getFormat(), outputEncoding != null ? outputEncoding.name() : "UTF-8"); try { CLILogger.fine("Computing hashes"); diff --git a/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java b/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java new file mode 100644 index 00000000..cc9113af --- /dev/null +++ b/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java @@ -0,0 +1,121 @@ + +package net.sourceforge.filebot.subtitle; + + +import static java.lang.Math.*; +import static net.sourceforge.tuned.FileUtilities.*; + +import java.io.File; +import java.io.IOException; +import java.io.StringReader; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +import net.sourceforge.filebot.vfs.MemoryFile; + + +public final class SubtitleUtilities { + + /** + * Detect charset and parse subtitle file even if extension is invalid + */ + public static List decodeSubtitles(MemoryFile file) throws IOException { + // gather all formats, put likely formats first + LinkedList likelyFormats = new LinkedList(); + + for (SubtitleFormat format : SubtitleFormat.values()) { + if (format.getFilter().accept(file.getName())) + likelyFormats.addFirst(format); + else + likelyFormats.addLast(format); + } + + // decode bytes + String textfile = getText(file.getData()); + + // decode subtitle file with the first reader that seems to work + for (SubtitleFormat format : likelyFormats) { + // reset reader to position 0 + SubtitleReader parser = format.newReader(new StringReader(textfile)); + + if (parser.hasNext()) { + // correct format found + List list = new ArrayList(500); + + // read subtitle file + while (parser.hasNext()) { + list.add(parser.next()); + } + + return list; + } + } + + // unsupported subtitle format + throw new IOException("Cannot read subtitle format"); + } + + + public static ByteBuffer exportSubtitles(MemoryFile data, SubtitleFormat outputFormat, long outputTimingOffset, Charset outputEncoding) throws IOException { + if (outputFormat != null && outputFormat != SubtitleFormat.SubRip) { + throw new IllegalArgumentException("Format not supported"); + } + + // convert to target format and target encoding + if (outputFormat == SubtitleFormat.SubRip) { + // output buffer + StringBuilder buffer = new StringBuilder(4 * 1024); + SubRipWriter out = new SubRipWriter(buffer); + + for (SubtitleElement it : decodeSubtitles(data)) { + if (outputTimingOffset != 0) + it = new SubtitleElement(max(0, it.getStart() + outputTimingOffset), max(0, it.getEnd() + outputTimingOffset), it.getText()); + + out.write(it); + } + + return outputEncoding.encode(CharBuffer.wrap(buffer)); + } + + // only change encoding + return outputEncoding.encode(getText(data.getData())); + } + + + public static SubtitleFormat getSubtitleFormat(File file) { + for (SubtitleFormat it : SubtitleFormat.values()) { + if (it.getFilter().accept(file)) + return it; + } + + return null; + } + + + public static SubtitleFormat getSubtitleFormatByName(String name) { + for (SubtitleFormat it : SubtitleFormat.values()) { + // check by name + if (it.name().equalsIgnoreCase(name)) + return it; + + // check by extension + if (it.getFilter().acceptExtension(name)) + return it; + } + + return null; + } + + + /** + * Dummy constructor to prevent instantiation. + */ + private SubtitleUtilities() { + throw new UnsupportedOperationException(); + } + +} diff --git a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleDownloadComponent.java b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleDownloadComponent.java index 8e203006..6f89c516 100644 --- a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleDownloadComponent.java +++ b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleDownloadComponent.java @@ -3,8 +3,8 @@ package net.sourceforge.filebot.ui.panel.subtitle; import static net.sourceforge.filebot.MediaTypes.*; +import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*; import static net.sourceforge.filebot.ui.NotificationLogging.*; -import static net.sourceforge.filebot.ui.panel.subtitle.SubtitleUtilities.*; import static net.sourceforge.tuned.FileUtilities.*; import static net.sourceforge.tuned.ui.TunedUtilities.*; @@ -327,12 +327,12 @@ class SubtitleDownloadComponent extends JComponent { SubtitleFileChooser sf = new SubtitleFileChooser(); // normalize name and auto-adjust extension - String ext = sf.getSelectedFormat().getFilter().extensions()[0]; + String ext = sf.getSelectedFormat().getFilter().extension(); String name = validateFileName(getNameWithoutExtension(file.getName())); sf.setSelectedFile(new File(name + "." + ext)); if (sf.showSaveDialog(getWindow(this)) == JFileChooser.APPROVE_OPTION) { - exportSubtitles(decodeSubtitles(file), sf.getSelectedFile(), sf.getSelectedEncoding(), sf.getSelectedFormat(), sf.getTimingOffset()); + writeFile(exportSubtitles(file, sf.getSelectedFormat(), sf.getTimingOffset(), sf.getSelectedEncoding()), sf.getSelectedFile()); } } else { // multiple files @@ -346,11 +346,11 @@ class SubtitleDownloadComponent extends JComponent { MemoryFile file = (MemoryFile) object; // normalize name and auto-adjust extension - String ext = sf.getSelectedFormat().getFilter().extensions()[0]; + String ext = sf.getSelectedFormat().getFilter().extension(); String name = validateFileName(getNameWithoutExtension(file.getName())); File destination = new File(folder, name + "." + ext); - exportSubtitles(decodeSubtitles(file), destination, sf.getSelectedEncoding(), sf.getSelectedFormat(), sf.getTimingOffset()); + writeFile(exportSubtitles(file, sf.getSelectedFormat(), sf.getTimingOffset(), sf.getSelectedEncoding()), destination); } } } diff --git a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java deleted file mode 100644 index 3edcd49b..00000000 --- a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java +++ /dev/null @@ -1,104 +0,0 @@ - -package net.sourceforge.filebot.ui.panel.subtitle; - - -import static java.lang.Math.*; -import static net.sourceforge.tuned.FileUtilities.*; - -import java.io.File; -import java.io.IOException; -import java.io.StringReader; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -import com.ibm.icu.text.CharsetDetector; - -import net.sourceforge.filebot.subtitle.SubRipWriter; -import net.sourceforge.filebot.subtitle.SubtitleElement; -import net.sourceforge.filebot.subtitle.SubtitleFormat; -import net.sourceforge.filebot.subtitle.SubtitleReader; -import net.sourceforge.filebot.vfs.MemoryFile; -import net.sourceforge.tuned.ByteBufferInputStream; - - -final class SubtitleUtilities { - - /** - * Detect charset and parse subtitle file even if extension is invalid - */ - public static List decodeSubtitles(MemoryFile file) throws IOException { - // detect charset and read text content - CharsetDetector detector = new CharsetDetector(); - detector.setDeclaredEncoding("UTF-8"); - detector.enableInputFilter(true); - - detector.setText(new ByteBufferInputStream(file.getData())); - String textfile = detector.detect().getString(); - - // gather all formats, put likely formats first - LinkedList priorityList = new LinkedList(); - - for (SubtitleFormat format : SubtitleFormat.values()) { - if (format.getFilter().accept(file.getName())) { - priorityList.addFirst(format); - } else { - priorityList.addLast(format); - } - } - - // decode subtitle file with the first reader that seems to work - for (SubtitleFormat format : priorityList) { - // reset reader to position 0 - SubtitleReader parser = format.newReader(new StringReader(textfile)); - - if (parser.hasNext()) { - // correct format found - List list = new ArrayList(500); - - // read subtitle file - while (parser.hasNext()) { - list.add(parser.next()); - } - - return list; - } - } - - // unsupported subtitle format - throw new IOException("Cannot read subtitle format"); - } - - - /** - * Write a subtitle file to disk - */ - public static void exportSubtitles(List data, File destination, Charset encoding, SubtitleFormat format, long timingOffset) throws IOException { - if (format != SubtitleFormat.SubRip) - throw new IllegalArgumentException("Format not supported"); - - StringBuilder buffer = new StringBuilder(4 * 1024); - SubRipWriter out = new SubRipWriter(buffer); - - for (SubtitleElement it : data) { - if (timingOffset != 0) - it = new SubtitleElement(max(0, it.getStart() + timingOffset), max(0, it.getEnd() + timingOffset), it.getText()); - - out.write(it); - } - - // write to file - writeFile(encoding.encode(CharBuffer.wrap(buffer)), destination); - } - - - /** - * Dummy constructor to prevent instantiation. - */ - private SubtitleUtilities() { - throw new UnsupportedOperationException(); - } - -} diff --git a/source/net/sourceforge/tuned/FileUtilities.java b/source/net/sourceforge/tuned/FileUtilities.java index 566b82ff..d1ac5def 100644 --- a/source/net/sourceforge/tuned/FileUtilities.java +++ b/source/net/sourceforge/tuned/FileUtilities.java @@ -13,6 +13,7 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -99,6 +100,20 @@ public final class FileUtilities { } + public static String getText(ByteBuffer data) throws IOException { + CharsetDetector detector = new CharsetDetector(); + detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding + detector.setText(new ByteBufferInputStream(data)); + + CharsetMatch charset = detector.detect(); + if (charset != null) + return charset.getString(); + + // assume UTF-8 by default + return Charset.forName("UTF-8").decode(data).toString(); + } + + /** * Pattern used for matching file extensions. * @@ -419,6 +434,11 @@ public final class FileUtilities { } + public String extension() { + return extensions[0]; + } + + public String[] extensions() { return extensions.clone(); }