* CLI: option for defining output encoding of downloaded subtitles

2025-03-09 22:09:47 -04:00 · 2011-09-14 18:13:34 +00:00 · 2011-09-14 18:13:34 +00:00 · 42940397de
commit 42940397de
parent 3c3bb0c744
6 changed files with 209 additions and 140 deletions
--- a/source/net/sourceforge/filebot/cli/ArgumentBean.java
+++ b/source/net/sourceforge/filebot/cli/ArgumentBean.java
@ -7,6 +7,7 @@ import static net.sourceforge.tuned.FileUtilities.*;

 import java.io.File;
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.logging.Level;
@ -26,11 +27,11 @@ public class ArgumentBean {
 	@Option(name = "-rename", usage = "Rename episode/movie files", metaVar = "fileset")
 	public boolean rename = false;
 	
-	@Option(name = "--db", usage = "Episode/Movie database", metaVar = "[TVRage, AniDB, TheTVDB] or [OpenSubtitles, TheMovieDB]")
-	public String db = null;
+	@Option(name = "--db", usage = "Episode/Movie database", metaVar = "TVRage, AniDB, TheTVDB, OpenSubtitles, TheMovieDB")
+	public String db;
 	
 	@Option(name = "--format", usage = "Episode naming scheme", metaVar = "expression")
-	public String format = "{n} - {s+'x'}{e.pad(2)} - {t}";
+	public String format;
 	
 	@Option(name = "-non-strict", usage = "Use less strict matching")
 	public boolean nonStrict = false;
@ -39,7 +40,7 @@ public class ArgumentBean {
 	public boolean getSubtitles;
 	
 	@Option(name = "--q", usage = "Search query", metaVar = "title")
-	public String query = null;
+	public String query;
 	
 	@Option(name = "--lang", usage = "Language", metaVar = "2-letter language code")
 	public String lang = "en";
@ -47,10 +48,13 @@ public class ArgumentBean {
 	@Option(name = "-check", usage = "Create/Check verification file", metaVar = "fileset")
 	public boolean check;
 	
-	@Option(name = "--output", usage = "Output options", metaVar = "[sfv, md5, sha1]")
-	public String output = "sfv";
+	@Option(name = "--output", usage = "Output options", metaVar = "sfv, md5, sha1")
+	public String output;
 	
-	@Option(name = "--log", usage = "Log level", metaVar = "[all, config, info, warning]")
+	@Option(name = "--encoding", usage = "Character encoding", metaVar = "UTF-8, windows-1252, GB18030")
+	public String encoding;
+	
+	@Option(name = "--log", usage = "Log level", metaVar = "all, config, info, warning")
 	public String log = "all";
 	
 	@Option(name = "-help", usage = "Print this help message")
@ -87,7 +91,7 @@ public class ArgumentBean {
 	

 	public ExpressionFormat getEpisodeFormat() throws ScriptException {
-		return new ExpressionFormat(format);
+		return format != null ? new ExpressionFormat(format) : null;
 	}
 	

@ -101,6 +105,11 @@ public class ArgumentBean {
 	}
 	

+	public Charset getEncoding() {
+		return encoding != null ? Charset.forName(encoding) : null;
+	}
+	
+
 	public Level getLogLevel() {
 		return Level.parse(log.toUpperCase());
 	}
--- a/source/net/sourceforge/filebot/cli/ArgumentProcessor.java
+++ b/source/net/sourceforge/filebot/cli/ArgumentProcessor.java
@ -7,11 +7,13 @@ import static net.sourceforge.filebot.MediaTypes.*;
 import static net.sourceforge.filebot.WebServices.*;
 import static net.sourceforge.filebot.cli.CLILogging.*;
 import static net.sourceforge.filebot.hash.VerificationUtilities.*;
+import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
 import static net.sourceforge.tuned.FileUtilities.*;

 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.LinkedHashMap;
@ -37,12 +39,14 @@ import net.sourceforge.filebot.similarity.Matcher;
 import net.sourceforge.filebot.similarity.NameSimilarityMetric;
 import net.sourceforge.filebot.similarity.SeriesNameMatcher;
 import net.sourceforge.filebot.similarity.SimilarityMetric;
+import net.sourceforge.filebot.subtitle.SubtitleFormat;
 import net.sourceforge.filebot.ui.Language;
 import net.sourceforge.filebot.ui.panel.rename.HistorySpooler;
 import net.sourceforge.filebot.ui.panel.rename.MatchSimilarityMetric;
 import net.sourceforge.filebot.vfs.ArchiveType;
 import net.sourceforge.filebot.vfs.MemoryFile;
 import net.sourceforge.filebot.web.Episode;
+import net.sourceforge.filebot.web.EpisodeFormat;
 import net.sourceforge.filebot.web.EpisodeListProvider;
 import net.sourceforge.filebot.web.MovieDescriptor;
 import net.sourceforge.filebot.web.MovieIdentificationService;
@ -60,7 +64,7 @@ public class ArgumentProcessor {
 			Set<File> files = new LinkedHashSet<File>(args.getFiles(true));
 			
 			if (args.getSubtitles) {
-				List<File> subtitles = getSubtitles(files, args.query, args.getLanguage());
+				List<File> subtitles = getSubtitles(files, args.query, args.getLanguage(), args.output, args.getEncoding());
 				files.addAll(subtitles);
 			}
 			
@ -69,7 +73,7 @@ public class ArgumentProcessor {
 			}
 			
 			if (args.check) {
-				check(files, args.output);
+				check(files, args.output, args.getEncoding());
 			}
 			
 			CLILogger.finest("Done ヾ(＠⌒ー⌒＠)ノ");
@ -168,7 +172,8 @@ public class ArgumentProcessor {
 		
 		for (Match<File, Episode> match : matches) {
 			File file = match.getValue();
-			String newName = format.format(new EpisodeBindingBean(match.getCandidate(), file));
+			Episode episode = match.getCandidate();
+			String newName = (format != null) ? format.format(new EpisodeBindingBean(episode, file)) : EpisodeFormat.SeasonEpisode.format(episode);
 			
 			if (isInvalidFileName(newName)) {
 				CLILogger.config("Stripping invalid characters from new name: " + newName);
@ -234,7 +239,7 @@ public class ArgumentProcessor {
 	}
 	

-	public List<File> getSubtitles(Collection<File> files, String query, Language language) throws Exception {
+	public List<File> getSubtitles(Collection<File> files, String query, Language language, String output, Charset outputEncoding) throws Exception {
 		// match movie hashes online
 		Set<File> videos = new TreeSet<File>(filter(files, VIDEO_FILES));
 		List<File> downloadedSubtitles = new ArrayList<File>();
@ -243,6 +248,17 @@ public class ArgumentProcessor {
 			throw new IllegalArgumentException("No video files: " + files);
 		}
 		
+		SubtitleFormat outputFormat = null;
+		if (output != null) {
+			outputFormat = getSubtitleFormatByName(output);
+			
+			// when rewriting subtitles to target format an encoding must be defined, default to UTF-8
+			if (outputEncoding == null)
+				outputEncoding = Charset.forName("UTF-8");
+			
+			CLILogger.config(format("Export as: %s (%s)", outputFormat, outputEncoding.displayName(Locale.ROOT)));
+		}
+		
 		// lookup subtitles by hash
 		for (VideoHashSubtitleService service : WebServices.getVideoHashSubtitleServices()) {
 			if (videos.isEmpty())
@ -253,7 +269,7 @@ public class ArgumentProcessor {
 			for (Entry<File, List<SubtitleDescriptor>> it : service.getSubtitleList(videos.toArray(new File[0]), language.getName()).entrySet()) {
 				if (it.getValue() != null && it.getValue().size() > 0) {
 					// auto-select first element if there are multiple hash matches for the same video files
-					File subtitle = fetchSubtitle(it.getValue().get(0), it.getKey());
+					File subtitle = fetchSubtitle(it.getValue().get(0), it.getKey(), outputFormat, outputEncoding);
 					
 					// download complete, cross this video off the list
 					videos.remove(it.getKey());
@ -277,7 +293,7 @@ public class ArgumentProcessor {
 						
 						for (SubtitleDescriptor descriptor : subtitles) {
 							if (filename.equalsIgnoreCase(descriptor.getName())) {
-								File subtitle = fetchSubtitle(descriptor, video);
+								File subtitle = fetchSubtitle(descriptor, video, outputFormat, outputEncoding);
 								
 								// download complete, cross this video off the list
 								videos.remove(video);
@ -300,30 +316,37 @@ public class ArgumentProcessor {
 	}
 	

-	private File fetchSubtitle(SubtitleDescriptor descriptor, File movieFile) throws Exception {
+	private File fetchSubtitle(SubtitleDescriptor descriptor, File movieFile, SubtitleFormat outputFormat, Charset outputEncoding) throws Exception {
 		// fetch subtitle archive
 		CLILogger.info(format("Fetching [%s.%s]", descriptor.getName(), descriptor.getType()));
 		ByteBuffer downloadedData = descriptor.fetch();
 		
 		// extract subtitles from archive
 		ArchiveType type = ArchiveType.forName(descriptor.getType());
-		MemoryFile subtitleData;
+		MemoryFile subtitleFile;
 		
 		if (type != ArchiveType.UNDEFINED) {
 			// extract subtitle from archive
-			subtitleData = type.fromData(downloadedData).iterator().next();
+			subtitleFile = type.fromData(downloadedData).iterator().next();
 		} else {
 			// assume that the fetched data is the subtitle
-			subtitleData = new MemoryFile(descriptor.getName() + "." + descriptor.getType(), downloadedData);
+			subtitleFile = new MemoryFile(descriptor.getName() + "." + descriptor.getType(), downloadedData);
 		}
 		
 		// subtitle filename is based on movie filename
-		String subtitleFileName = getNameWithoutExtension(movieFile.getName()) + "." + getExtension(subtitleData.getName());
-		File destination = new File(movieFile.getParentFile(), validateFileName(subtitleFileName));
+		String name = getName(movieFile);
+		String ext = getExtension(subtitleFile.getName());
+		ByteBuffer data = subtitleFile.getData();
 		
-		CLILogger.config(format("Writing [%s] to [%s]", subtitleData.getName(), destination.getName()));
-		writeFile(subtitleData.getData(), destination);
+		if (outputFormat != null || outputEncoding != null) {
+			ext = outputFormat.getFilter().extension(); // adjust extension of the output file
+			data = exportSubtitles(subtitleFile, outputFormat, 0, outputEncoding);
+		}
 		
+		File destination = new File(movieFile.getParentFile(), name + "." + ext);
+		CLILogger.config(format("Writing [%s] to [%s]", subtitleFile.getName(), destination.getName()));
+		
+		writeFile(data, destination);
 		return destination;
 	}
 	
@ -435,7 +458,7 @@ public class ArgumentProcessor {
 	}
 	

-	public void check(Collection<File> files, String output) throws Exception {
+	public void check(Collection<File> files, String output, Charset outputEncoding) throws Exception {
 		// check verification file
 		if (containsOnly(files, MediaTypes.getDefaultFilter("verification"))) {
 			// only check existing hashes
@ -474,19 +497,19 @@ public class ArgumentProcessor {
 		} else {
 			// auto-select the filename based on folder and type
 			hashType = (output != null) ? getHashTypeByExtension(output) : HashType.SFV;
-			outputFile = new File(root, root.getName() + "." + hashType.getFilter().extensions()[0]);
+			outputFile = new File(root, root.getName() + "." + hashType.getFilter().extension());
 		}
 		
-		CLILogger.config("Using output file: " + outputFile);
 		if (hashType == null) {
 			throw new IllegalArgumentException("Illegal output type: " + output);
 		}
 		
-		compute(root.getPath(), files, outputFile, hashType);
+		CLILogger.config("Using output file: " + outputFile);
+		compute(root.getPath(), files, outputFile, hashType, outputEncoding);
 	}
 	

-	public boolean check(File verificationFile, File parent) throws Exception {
+	private boolean check(File verificationFile, File root) throws Exception {
 		HashType type = getHashType(verificationFile);
 		
 		// check if type is supported
@ -503,8 +526,8 @@ public class ArgumentProcessor {
 				try {
 					Entry<File, String> it = parser.next();
 					
-					File file = new File(parent, it.getKey().getPath()).getAbsoluteFile();
-					String current = computeHash(new File(parent, it.getKey().getPath()), type);
+					File file = new File(root, it.getKey().getPath()).getAbsoluteFile();
+					String current = computeHash(new File(root, it.getKey().getPath()), type);
 					CLILogger.info(format("%s %s", current, file));
 					
 					if (current.compareToIgnoreCase(it.getValue()) != 0) {
@ -523,9 +546,9 @@ public class ArgumentProcessor {
 	}
 	

-	private void compute(String root, Collection<File> files, File outputFile, HashType hashType) throws IOException, Exception {
+	private void compute(String root, Collection<File> files, File outputFile, HashType hashType, Charset outputEncoding) throws IOException, Exception {
 		// compute hashes recursively and write to file
-		VerificationFileWriter out = new VerificationFileWriter(outputFile, hashType.getFormat(), "UTF-8");
+		VerificationFileWriter out = new VerificationFileWriter(outputFile, hashType.getFormat(), outputEncoding != null ? outputEncoding.name() : "UTF-8");
 		
 		try {
 			CLILogger.fine("Computing hashes");
--- a/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java
+++ b/source/net/sourceforge/filebot/subtitle/SubtitleUtilities.java
@ -0,0 +1,121 @@
+
+package net.sourceforge.filebot.subtitle;
+
+
+import static java.lang.Math.*;
+import static net.sourceforge.tuned.FileUtilities.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import net.sourceforge.filebot.vfs.MemoryFile;
+
+
+public final class SubtitleUtilities {
+	
+	/**
+	 * Detect charset and parse subtitle file even if extension is invalid
+	 */
+	public static List<SubtitleElement> decodeSubtitles(MemoryFile file) throws IOException {
+		// gather all formats, put likely formats first
+		LinkedList<SubtitleFormat> likelyFormats = new LinkedList<SubtitleFormat>();
+		
+		for (SubtitleFormat format : SubtitleFormat.values()) {
+			if (format.getFilter().accept(file.getName()))
+				likelyFormats.addFirst(format);
+			else
+				likelyFormats.addLast(format);
+		}
+		
+		// decode bytes
+		String textfile = getText(file.getData());
+		
+		// decode subtitle file with the first reader that seems to work
+		for (SubtitleFormat format : likelyFormats) {
+			// reset reader to position 0
+			SubtitleReader parser = format.newReader(new StringReader(textfile));
+			
+			if (parser.hasNext()) {
+				// correct format found
+				List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
+				
+				// read subtitle file
+				while (parser.hasNext()) {
+					list.add(parser.next());
+				}
+				
+				return list;
+			}
+		}
+		
+		// unsupported subtitle format
+		throw new IOException("Cannot read subtitle format");
+	}
+	
+
+	public static ByteBuffer exportSubtitles(MemoryFile data, SubtitleFormat outputFormat, long outputTimingOffset, Charset outputEncoding) throws IOException {
+		if (outputFormat != null && outputFormat != SubtitleFormat.SubRip) {
+			throw new IllegalArgumentException("Format not supported");
+		}
+		
+		// convert to target format and target encoding
+		if (outputFormat == SubtitleFormat.SubRip) {
+			// output buffer
+			StringBuilder buffer = new StringBuilder(4 * 1024);
+			SubRipWriter out = new SubRipWriter(buffer);
+			
+			for (SubtitleElement it : decodeSubtitles(data)) {
+				if (outputTimingOffset != 0)
+					it = new SubtitleElement(max(0, it.getStart() + outputTimingOffset), max(0, it.getEnd() + outputTimingOffset), it.getText());
+				
+				out.write(it);
+			}
+			
+			return outputEncoding.encode(CharBuffer.wrap(buffer));
+		}
+		
+		// only change encoding
+		return outputEncoding.encode(getText(data.getData()));
+	}
+	
+
+	public static SubtitleFormat getSubtitleFormat(File file) {
+		for (SubtitleFormat it : SubtitleFormat.values()) {
+			if (it.getFilter().accept(file))
+				return it;
+		}
+		
+		return null;
+	}
+	
+
+	public static SubtitleFormat getSubtitleFormatByName(String name) {
+		for (SubtitleFormat it : SubtitleFormat.values()) {
+			// check by name
+			if (it.name().equalsIgnoreCase(name))
+				return it;
+			
+			// check by extension
+			if (it.getFilter().acceptExtension(name))
+				return it;
+		}
+		
+		return null;
+	}
+	
+
+	/**
+	 * Dummy constructor to prevent instantiation.
+	 */
+	private SubtitleUtilities() {
+		throw new UnsupportedOperationException();
+	}
+	
+}
--- a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleDownloadComponent.java
+++ b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleDownloadComponent.java
@ -3,8 +3,8 @@ package net.sourceforge.filebot.ui.panel.subtitle;


 import static net.sourceforge.filebot.MediaTypes.*;
+import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
 import static net.sourceforge.filebot.ui.NotificationLogging.*;
-import static net.sourceforge.filebot.ui.panel.subtitle.SubtitleUtilities.*;
 import static net.sourceforge.tuned.FileUtilities.*;
 import static net.sourceforge.tuned.ui.TunedUtilities.*;

@ -327,12 +327,12 @@ class SubtitleDownloadComponent extends JComponent {
 				SubtitleFileChooser sf = new SubtitleFileChooser();
 				
 				// normalize name and auto-adjust extension
-				String ext = sf.getSelectedFormat().getFilter().extensions()[0];
+				String ext = sf.getSelectedFormat().getFilter().extension();
 				String name = validateFileName(getNameWithoutExtension(file.getName()));
 				sf.setSelectedFile(new File(name + "." + ext));
 				
 				if (sf.showSaveDialog(getWindow(this)) == JFileChooser.APPROVE_OPTION) {
-					exportSubtitles(decodeSubtitles(file), sf.getSelectedFile(), sf.getSelectedEncoding(), sf.getSelectedFormat(), sf.getTimingOffset());
+					writeFile(exportSubtitles(file, sf.getSelectedFormat(), sf.getTimingOffset(), sf.getSelectedEncoding()), sf.getSelectedFile());
 				}
 			} else {
 				// multiple files
@ -346,11 +346,11 @@ class SubtitleDownloadComponent extends JComponent {
 						MemoryFile file = (MemoryFile) object;
 						
 						// normalize name and auto-adjust extension
-						String ext = sf.getSelectedFormat().getFilter().extensions()[0];
+						String ext = sf.getSelectedFormat().getFilter().extension();
 						String name = validateFileName(getNameWithoutExtension(file.getName()));
 						File destination = new File(folder, name + "." + ext);
 						
-						exportSubtitles(decodeSubtitles(file), destination, sf.getSelectedEncoding(), sf.getSelectedFormat(), sf.getTimingOffset());
+						writeFile(exportSubtitles(file, sf.getSelectedFormat(), sf.getTimingOffset(), sf.getSelectedEncoding()), destination);
 					}
 				}
 			}
--- a/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java
+++ b/source/net/sourceforge/filebot/ui/panel/subtitle/SubtitleUtilities.java
@ -1,104 +0,0 @@
-
-package net.sourceforge.filebot.ui.panel.subtitle;
-
-
-import static java.lang.Math.*;
-import static net.sourceforge.tuned.FileUtilities.*;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.StringReader;
-import java.nio.CharBuffer;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-
-import com.ibm.icu.text.CharsetDetector;
-
-import net.sourceforge.filebot.subtitle.SubRipWriter;
-import net.sourceforge.filebot.subtitle.SubtitleElement;
-import net.sourceforge.filebot.subtitle.SubtitleFormat;
-import net.sourceforge.filebot.subtitle.SubtitleReader;
-import net.sourceforge.filebot.vfs.MemoryFile;
-import net.sourceforge.tuned.ByteBufferInputStream;
-
-
-final class SubtitleUtilities {
-	
-	/**
-	 * Detect charset and parse subtitle file even if extension is invalid
-	 */
-	public static List<SubtitleElement> decodeSubtitles(MemoryFile file) throws IOException {
-		// detect charset and read text content 
-		CharsetDetector detector = new CharsetDetector();
-		detector.setDeclaredEncoding("UTF-8");
-		detector.enableInputFilter(true);
-		
-		detector.setText(new ByteBufferInputStream(file.getData()));
-		String textfile = detector.detect().getString();
-		
-		// gather all formats, put likely formats first
-		LinkedList<SubtitleFormat> priorityList = new LinkedList<SubtitleFormat>();
-		
-		for (SubtitleFormat format : SubtitleFormat.values()) {
-			if (format.getFilter().accept(file.getName())) {
-				priorityList.addFirst(format);
-			} else {
-				priorityList.addLast(format);
-			}
-		}
-		
-		// decode subtitle file with the first reader that seems to work
-		for (SubtitleFormat format : priorityList) {
-			// reset reader to position 0
-			SubtitleReader parser = format.newReader(new StringReader(textfile));
-			
-			if (parser.hasNext()) {
-				// correct format found
-				List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
-				
-				// read subtitle file
-				while (parser.hasNext()) {
-					list.add(parser.next());
-				}
-				
-				return list;
-			}
-		}
-		
-		// unsupported subtitle format
-		throw new IOException("Cannot read subtitle format");
-	}
-	
-
-	/**
-	 * Write a subtitle file to disk
-	 */
-	public static void exportSubtitles(List<SubtitleElement> data, File destination, Charset encoding, SubtitleFormat format, long timingOffset) throws IOException {
-		if (format != SubtitleFormat.SubRip)
-			throw new IllegalArgumentException("Format not supported");
-		
-		StringBuilder buffer = new StringBuilder(4 * 1024);
-		SubRipWriter out = new SubRipWriter(buffer);
-		
-		for (SubtitleElement it : data) {
-			if (timingOffset != 0)
-				it = new SubtitleElement(max(0, it.getStart() + timingOffset), max(0, it.getEnd() + timingOffset), it.getText());
-			
-			out.write(it);
-		}
-		
-		// write to file
-		writeFile(encoding.encode(CharBuffer.wrap(buffer)), destination);
-	}
-	
-
-	/**
-	 * Dummy constructor to prevent instantiation.
-	 */
-	private SubtitleUtilities() {
-		throw new UnsupportedOperationException();
-	}
-	
-}
--- a/source/net/sourceforge/tuned/FileUtilities.java
+++ b/source/net/sourceforge/tuned/FileUtilities.java
@ -13,6 +13,7 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@ -99,6 +100,20 @@ public final class FileUtilities {
 	}
 	

+	public static String getText(ByteBuffer data) throws IOException {
+		CharsetDetector detector = new CharsetDetector();
+		detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding
+		detector.setText(new ByteBufferInputStream(data));
+		
+		CharsetMatch charset = detector.detect();
+		if (charset != null)
+			return charset.getString();
+		
+		// assume UTF-8 by default
+		return Charset.forName("UTF-8").decode(data).toString();
+	}
+	
+
 	/**
 	 * Pattern used for matching file extensions.
 	 * 
@ -419,6 +434,11 @@ public final class FileUtilities {
 		}
 		

+		public String extension() {
+			return extensions[0];
+		}
+		
+
 		public String[] extensions() {
 			return extensions.clone();
 		}