1
0
mirror of https://github.com/mitb-archive/filebot synced 2024-12-24 08:48:51 -05:00

* CLI: option for defining output encoding of downloaded subtitles

This commit is contained in:
Reinhard Pointner 2011-09-14 18:13:34 +00:00
parent 3c3bb0c744
commit 42940397de
6 changed files with 209 additions and 140 deletions

View File

@ -7,6 +7,7 @@ import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
@ -26,11 +27,11 @@ public class ArgumentBean {
@Option(name = "-rename", usage = "Rename episode/movie files", metaVar = "fileset")
public boolean rename = false;
@Option(name = "--db", usage = "Episode/Movie database", metaVar = "[TVRage, AniDB, TheTVDB] or [OpenSubtitles, TheMovieDB]")
public String db = null;
@Option(name = "--db", usage = "Episode/Movie database", metaVar = "TVRage, AniDB, TheTVDB, OpenSubtitles, TheMovieDB")
public String db;
@Option(name = "--format", usage = "Episode naming scheme", metaVar = "expression")
public String format = "{n} - {s+'x'}{e.pad(2)} - {t}";
public String format;
@Option(name = "-non-strict", usage = "Use less strict matching")
public boolean nonStrict = false;
@ -39,7 +40,7 @@ public class ArgumentBean {
public boolean getSubtitles;
@Option(name = "--q", usage = "Search query", metaVar = "title")
public String query = null;
public String query;
@Option(name = "--lang", usage = "Language", metaVar = "2-letter language code")
public String lang = "en";
@ -47,10 +48,13 @@ public class ArgumentBean {
@Option(name = "-check", usage = "Create/Check verification file", metaVar = "fileset")
public boolean check;
@Option(name = "--output", usage = "Output options", metaVar = "[sfv, md5, sha1]")
public String output = "sfv";
@Option(name = "--output", usage = "Output options", metaVar = "sfv, md5, sha1")
public String output;
@Option(name = "--log", usage = "Log level", metaVar = "[all, config, info, warning]")
@Option(name = "--encoding", usage = "Character encoding", metaVar = "UTF-8, windows-1252, GB18030")
public String encoding;
@Option(name = "--log", usage = "Log level", metaVar = "all, config, info, warning")
public String log = "all";
@Option(name = "-help", usage = "Print this help message")
@ -87,7 +91,7 @@ public class ArgumentBean {
public ExpressionFormat getEpisodeFormat() throws ScriptException {
return new ExpressionFormat(format);
return format != null ? new ExpressionFormat(format) : null;
}
@ -101,6 +105,11 @@ public class ArgumentBean {
}
public Charset getEncoding() {
return encoding != null ? Charset.forName(encoding) : null;
}
public Level getLogLevel() {
return Level.parse(log.toUpperCase());
}

View File

@ -7,11 +7,13 @@ import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.WebServices.*;
import static net.sourceforge.filebot.cli.CLILogging.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
@ -37,12 +39,14 @@ import net.sourceforge.filebot.similarity.Matcher;
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.SeriesNameMatcher;
import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.subtitle.SubtitleFormat;
import net.sourceforge.filebot.ui.Language;
import net.sourceforge.filebot.ui.panel.rename.HistorySpooler;
import net.sourceforge.filebot.ui.panel.rename.MatchSimilarityMetric;
import net.sourceforge.filebot.vfs.ArchiveType;
import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.EpisodeFormat;
import net.sourceforge.filebot.web.EpisodeListProvider;
import net.sourceforge.filebot.web.MovieDescriptor;
import net.sourceforge.filebot.web.MovieIdentificationService;
@ -60,7 +64,7 @@ public class ArgumentProcessor {
Set<File> files = new LinkedHashSet<File>(args.getFiles(true));
if (args.getSubtitles) {
List<File> subtitles = getSubtitles(files, args.query, args.getLanguage());
List<File> subtitles = getSubtitles(files, args.query, args.getLanguage(), args.output, args.getEncoding());
files.addAll(subtitles);
}
@ -69,7 +73,7 @@ public class ArgumentProcessor {
}
if (args.check) {
check(files, args.output);
check(files, args.output, args.getEncoding());
}
CLILogger.finest("Done ヾ(@⌒ー⌒@)");
@ -168,7 +172,8 @@ public class ArgumentProcessor {
for (Match<File, Episode> match : matches) {
File file = match.getValue();
String newName = format.format(new EpisodeBindingBean(match.getCandidate(), file));
Episode episode = match.getCandidate();
String newName = (format != null) ? format.format(new EpisodeBindingBean(episode, file)) : EpisodeFormat.SeasonEpisode.format(episode);
if (isInvalidFileName(newName)) {
CLILogger.config("Stripping invalid characters from new name: " + newName);
@ -234,7 +239,7 @@ public class ArgumentProcessor {
}
public List<File> getSubtitles(Collection<File> files, String query, Language language) throws Exception {
public List<File> getSubtitles(Collection<File> files, String query, Language language, String output, Charset outputEncoding) throws Exception {
// match movie hashes online
Set<File> videos = new TreeSet<File>(filter(files, VIDEO_FILES));
List<File> downloadedSubtitles = new ArrayList<File>();
@ -243,6 +248,17 @@ public class ArgumentProcessor {
throw new IllegalArgumentException("No video files: " + files);
}
SubtitleFormat outputFormat = null;
if (output != null) {
outputFormat = getSubtitleFormatByName(output);
// when rewriting subtitles to target format an encoding must be defined, default to UTF-8
if (outputEncoding == null)
outputEncoding = Charset.forName("UTF-8");
CLILogger.config(format("Export as: %s (%s)", outputFormat, outputEncoding.displayName(Locale.ROOT)));
}
// lookup subtitles by hash
for (VideoHashSubtitleService service : WebServices.getVideoHashSubtitleServices()) {
if (videos.isEmpty())
@ -253,7 +269,7 @@ public class ArgumentProcessor {
for (Entry<File, List<SubtitleDescriptor>> it : service.getSubtitleList(videos.toArray(new File[0]), language.getName()).entrySet()) {
if (it.getValue() != null && it.getValue().size() > 0) {
// auto-select first element if there are multiple hash matches for the same video files
File subtitle = fetchSubtitle(it.getValue().get(0), it.getKey());
File subtitle = fetchSubtitle(it.getValue().get(0), it.getKey(), outputFormat, outputEncoding);
// download complete, cross this video off the list
videos.remove(it.getKey());
@ -277,7 +293,7 @@ public class ArgumentProcessor {
for (SubtitleDescriptor descriptor : subtitles) {
if (filename.equalsIgnoreCase(descriptor.getName())) {
File subtitle = fetchSubtitle(descriptor, video);
File subtitle = fetchSubtitle(descriptor, video, outputFormat, outputEncoding);
// download complete, cross this video off the list
videos.remove(video);
@ -300,30 +316,37 @@ public class ArgumentProcessor {
}
private File fetchSubtitle(SubtitleDescriptor descriptor, File movieFile) throws Exception {
private File fetchSubtitle(SubtitleDescriptor descriptor, File movieFile, SubtitleFormat outputFormat, Charset outputEncoding) throws Exception {
// fetch subtitle archive
CLILogger.info(format("Fetching [%s.%s]", descriptor.getName(), descriptor.getType()));
ByteBuffer downloadedData = descriptor.fetch();
// extract subtitles from archive
ArchiveType type = ArchiveType.forName(descriptor.getType());
MemoryFile subtitleData;
MemoryFile subtitleFile;
if (type != ArchiveType.UNDEFINED) {
// extract subtitle from archive
subtitleData = type.fromData(downloadedData).iterator().next();
subtitleFile = type.fromData(downloadedData).iterator().next();
} else {
// assume that the fetched data is the subtitle
subtitleData = new MemoryFile(descriptor.getName() + "." + descriptor.getType(), downloadedData);
subtitleFile = new MemoryFile(descriptor.getName() + "." + descriptor.getType(), downloadedData);
}
// subtitle filename is based on movie filename
String subtitleFileName = getNameWithoutExtension(movieFile.getName()) + "." + getExtension(subtitleData.getName());
File destination = new File(movieFile.getParentFile(), validateFileName(subtitleFileName));
String name = getName(movieFile);
String ext = getExtension(subtitleFile.getName());
ByteBuffer data = subtitleFile.getData();
CLILogger.config(format("Writing [%s] to [%s]", subtitleData.getName(), destination.getName()));
writeFile(subtitleData.getData(), destination);
if (outputFormat != null || outputEncoding != null) {
ext = outputFormat.getFilter().extension(); // adjust extension of the output file
data = exportSubtitles(subtitleFile, outputFormat, 0, outputEncoding);
}
File destination = new File(movieFile.getParentFile(), name + "." + ext);
CLILogger.config(format("Writing [%s] to [%s]", subtitleFile.getName(), destination.getName()));
writeFile(data, destination);
return destination;
}
@ -435,7 +458,7 @@ public class ArgumentProcessor {
}
public void check(Collection<File> files, String output) throws Exception {
public void check(Collection<File> files, String output, Charset outputEncoding) throws Exception {
// check verification file
if (containsOnly(files, MediaTypes.getDefaultFilter("verification"))) {
// only check existing hashes
@ -474,19 +497,19 @@ public class ArgumentProcessor {
} else {
// auto-select the filename based on folder and type
hashType = (output != null) ? getHashTypeByExtension(output) : HashType.SFV;
outputFile = new File(root, root.getName() + "." + hashType.getFilter().extensions()[0]);
outputFile = new File(root, root.getName() + "." + hashType.getFilter().extension());
}
CLILogger.config("Using output file: " + outputFile);
if (hashType == null) {
throw new IllegalArgumentException("Illegal output type: " + output);
}
compute(root.getPath(), files, outputFile, hashType);
CLILogger.config("Using output file: " + outputFile);
compute(root.getPath(), files, outputFile, hashType, outputEncoding);
}
public boolean check(File verificationFile, File parent) throws Exception {
private boolean check(File verificationFile, File root) throws Exception {
HashType type = getHashType(verificationFile);
// check if type is supported
@ -503,8 +526,8 @@ public class ArgumentProcessor {
try {
Entry<File, String> it = parser.next();
File file = new File(parent, it.getKey().getPath()).getAbsoluteFile();
String current = computeHash(new File(parent, it.getKey().getPath()), type);
File file = new File(root, it.getKey().getPath()).getAbsoluteFile();
String current = computeHash(new File(root, it.getKey().getPath()), type);
CLILogger.info(format("%s %s", current, file));
if (current.compareToIgnoreCase(it.getValue()) != 0) {
@ -523,9 +546,9 @@ public class ArgumentProcessor {
}
private void compute(String root, Collection<File> files, File outputFile, HashType hashType) throws IOException, Exception {
private void compute(String root, Collection<File> files, File outputFile, HashType hashType, Charset outputEncoding) throws IOException, Exception {
// compute hashes recursively and write to file
VerificationFileWriter out = new VerificationFileWriter(outputFile, hashType.getFormat(), "UTF-8");
VerificationFileWriter out = new VerificationFileWriter(outputFile, hashType.getFormat(), outputEncoding != null ? outputEncoding.name() : "UTF-8");
try {
CLILogger.fine("Computing hashes");

View File

@ -0,0 +1,121 @@
package net.sourceforge.filebot.subtitle;
import static java.lang.Math.*;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import net.sourceforge.filebot.vfs.MemoryFile;
public final class SubtitleUtilities {
/**
* Detect charset and parse subtitle file even if extension is invalid
*/
public static List<SubtitleElement> decodeSubtitles(MemoryFile file) throws IOException {
// gather all formats, put likely formats first
LinkedList<SubtitleFormat> likelyFormats = new LinkedList<SubtitleFormat>();
for (SubtitleFormat format : SubtitleFormat.values()) {
if (format.getFilter().accept(file.getName()))
likelyFormats.addFirst(format);
else
likelyFormats.addLast(format);
}
// decode bytes
String textfile = getText(file.getData());
// decode subtitle file with the first reader that seems to work
for (SubtitleFormat format : likelyFormats) {
// reset reader to position 0
SubtitleReader parser = format.newReader(new StringReader(textfile));
if (parser.hasNext()) {
// correct format found
List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
// read subtitle file
while (parser.hasNext()) {
list.add(parser.next());
}
return list;
}
}
// unsupported subtitle format
throw new IOException("Cannot read subtitle format");
}
public static ByteBuffer exportSubtitles(MemoryFile data, SubtitleFormat outputFormat, long outputTimingOffset, Charset outputEncoding) throws IOException {
if (outputFormat != null && outputFormat != SubtitleFormat.SubRip) {
throw new IllegalArgumentException("Format not supported");
}
// convert to target format and target encoding
if (outputFormat == SubtitleFormat.SubRip) {
// output buffer
StringBuilder buffer = new StringBuilder(4 * 1024);
SubRipWriter out = new SubRipWriter(buffer);
for (SubtitleElement it : decodeSubtitles(data)) {
if (outputTimingOffset != 0)
it = new SubtitleElement(max(0, it.getStart() + outputTimingOffset), max(0, it.getEnd() + outputTimingOffset), it.getText());
out.write(it);
}
return outputEncoding.encode(CharBuffer.wrap(buffer));
}
// only change encoding
return outputEncoding.encode(getText(data.getData()));
}
public static SubtitleFormat getSubtitleFormat(File file) {
for (SubtitleFormat it : SubtitleFormat.values()) {
if (it.getFilter().accept(file))
return it;
}
return null;
}
public static SubtitleFormat getSubtitleFormatByName(String name) {
for (SubtitleFormat it : SubtitleFormat.values()) {
// check by name
if (it.name().equalsIgnoreCase(name))
return it;
// check by extension
if (it.getFilter().acceptExtension(name))
return it;
}
return null;
}
/**
* Dummy constructor to prevent instantiation.
*/
private SubtitleUtilities() {
throw new UnsupportedOperationException();
}
}

View File

@ -3,8 +3,8 @@ package net.sourceforge.filebot.ui.panel.subtitle;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
import static net.sourceforge.filebot.ui.NotificationLogging.*;
import static net.sourceforge.filebot.ui.panel.subtitle.SubtitleUtilities.*;
import static net.sourceforge.tuned.FileUtilities.*;
import static net.sourceforge.tuned.ui.TunedUtilities.*;
@ -327,12 +327,12 @@ class SubtitleDownloadComponent extends JComponent {
SubtitleFileChooser sf = new SubtitleFileChooser();
// normalize name and auto-adjust extension
String ext = sf.getSelectedFormat().getFilter().extensions()[0];
String ext = sf.getSelectedFormat().getFilter().extension();
String name = validateFileName(getNameWithoutExtension(file.getName()));
sf.setSelectedFile(new File(name + "." + ext));
if (sf.showSaveDialog(getWindow(this)) == JFileChooser.APPROVE_OPTION) {
exportSubtitles(decodeSubtitles(file), sf.getSelectedFile(), sf.getSelectedEncoding(), sf.getSelectedFormat(), sf.getTimingOffset());
writeFile(exportSubtitles(file, sf.getSelectedFormat(), sf.getTimingOffset(), sf.getSelectedEncoding()), sf.getSelectedFile());
}
} else {
// multiple files
@ -346,11 +346,11 @@ class SubtitleDownloadComponent extends JComponent {
MemoryFile file = (MemoryFile) object;
// normalize name and auto-adjust extension
String ext = sf.getSelectedFormat().getFilter().extensions()[0];
String ext = sf.getSelectedFormat().getFilter().extension();
String name = validateFileName(getNameWithoutExtension(file.getName()));
File destination = new File(folder, name + "." + ext);
exportSubtitles(decodeSubtitles(file), destination, sf.getSelectedEncoding(), sf.getSelectedFormat(), sf.getTimingOffset());
writeFile(exportSubtitles(file, sf.getSelectedFormat(), sf.getTimingOffset(), sf.getSelectedEncoding()), destination);
}
}
}

View File

@ -1,104 +0,0 @@
package net.sourceforge.filebot.ui.panel.subtitle;
import static java.lang.Math.*;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import com.ibm.icu.text.CharsetDetector;
import net.sourceforge.filebot.subtitle.SubRipWriter;
import net.sourceforge.filebot.subtitle.SubtitleElement;
import net.sourceforge.filebot.subtitle.SubtitleFormat;
import net.sourceforge.filebot.subtitle.SubtitleReader;
import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.tuned.ByteBufferInputStream;
final class SubtitleUtilities {
/**
* Detect charset and parse subtitle file even if extension is invalid
*/
public static List<SubtitleElement> decodeSubtitles(MemoryFile file) throws IOException {
// detect charset and read text content
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8");
detector.enableInputFilter(true);
detector.setText(new ByteBufferInputStream(file.getData()));
String textfile = detector.detect().getString();
// gather all formats, put likely formats first
LinkedList<SubtitleFormat> priorityList = new LinkedList<SubtitleFormat>();
for (SubtitleFormat format : SubtitleFormat.values()) {
if (format.getFilter().accept(file.getName())) {
priorityList.addFirst(format);
} else {
priorityList.addLast(format);
}
}
// decode subtitle file with the first reader that seems to work
for (SubtitleFormat format : priorityList) {
// reset reader to position 0
SubtitleReader parser = format.newReader(new StringReader(textfile));
if (parser.hasNext()) {
// correct format found
List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
// read subtitle file
while (parser.hasNext()) {
list.add(parser.next());
}
return list;
}
}
// unsupported subtitle format
throw new IOException("Cannot read subtitle format");
}
/**
* Write a subtitle file to disk
*/
public static void exportSubtitles(List<SubtitleElement> data, File destination, Charset encoding, SubtitleFormat format, long timingOffset) throws IOException {
if (format != SubtitleFormat.SubRip)
throw new IllegalArgumentException("Format not supported");
StringBuilder buffer = new StringBuilder(4 * 1024);
SubRipWriter out = new SubRipWriter(buffer);
for (SubtitleElement it : data) {
if (timingOffset != 0)
it = new SubtitleElement(max(0, it.getStart() + timingOffset), max(0, it.getEnd() + timingOffset), it.getText());
out.write(it);
}
// write to file
writeFile(encoding.encode(CharBuffer.wrap(buffer)), destination);
}
/**
* Dummy constructor to prevent instantiation.
*/
private SubtitleUtilities() {
throw new UnsupportedOperationException();
}
}

View File

@ -13,6 +13,7 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -99,6 +100,20 @@ public final class FileUtilities {
}
public static String getText(ByteBuffer data) throws IOException {
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding
detector.setText(new ByteBufferInputStream(data));
CharsetMatch charset = detector.detect();
if (charset != null)
return charset.getString();
// assume UTF-8 by default
return Charset.forName("UTF-8").decode(data).toString();
}
/**
* Pattern used for matching file extensions.
*
@ -419,6 +434,11 @@ public final class FileUtilities {
}
public String extension() {
return extensions[0];
}
public String[] extensions() {
return extensions.clone();
}