2011-09-14 14:13:34 -04:00
|
|
|
|
|
|
|
package net.sourceforge.filebot.subtitle;
|
|
|
|
|
|
|
|
|
|
|
|
import static java.lang.Math.*;
|
2012-07-16 06:09:21 -04:00
|
|
|
import static java.util.Arrays.*;
|
|
|
|
import static java.util.Collections.*;
|
2011-11-25 13:52:31 -05:00
|
|
|
import static net.sourceforge.filebot.MediaTypes.*;
|
2012-07-16 06:09:21 -04:00
|
|
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
2011-09-14 14:13:34 -04:00
|
|
|
import static net.sourceforge.tuned.FileUtilities.*;
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.StringReader;
|
|
|
|
import java.nio.ByteBuffer;
|
|
|
|
import java.nio.CharBuffer;
|
|
|
|
import java.nio.charset.Charset;
|
|
|
|
import java.util.ArrayList;
|
2011-11-25 13:52:31 -05:00
|
|
|
import java.util.Collection;
|
|
|
|
import java.util.HashSet;
|
|
|
|
import java.util.Iterator;
|
2012-07-16 06:09:21 -04:00
|
|
|
import java.util.LinkedHashMap;
|
2011-11-25 13:52:31 -05:00
|
|
|
import java.util.LinkedHashSet;
|
2011-09-14 14:13:34 -04:00
|
|
|
import java.util.LinkedList;
|
|
|
|
import java.util.List;
|
2012-07-16 06:09:21 -04:00
|
|
|
import java.util.Map;
|
2011-11-25 13:52:31 -05:00
|
|
|
import java.util.Set;
|
2011-09-14 14:13:34 -04:00
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
import net.sourceforge.filebot.similarity.EpisodeMetrics;
|
|
|
|
import net.sourceforge.filebot.similarity.Match;
|
|
|
|
import net.sourceforge.filebot.similarity.Matcher;
|
|
|
|
import net.sourceforge.filebot.similarity.MetricAvg;
|
2011-11-25 13:52:31 -05:00
|
|
|
import net.sourceforge.filebot.similarity.NameSimilarityMetric;
|
2012-07-16 06:09:21 -04:00
|
|
|
import net.sourceforge.filebot.similarity.SequenceMatchSimilarity;
|
2011-11-25 13:52:31 -05:00
|
|
|
import net.sourceforge.filebot.similarity.SimilarityMetric;
|
|
|
|
import net.sourceforge.filebot.ui.Language;
|
|
|
|
import net.sourceforge.filebot.vfs.ArchiveType;
|
2011-09-14 14:13:34 -04:00
|
|
|
import net.sourceforge.filebot.vfs.MemoryFile;
|
2011-11-25 13:52:31 -05:00
|
|
|
import net.sourceforge.filebot.web.SearchResult;
|
|
|
|
import net.sourceforge.filebot.web.SubtitleDescriptor;
|
|
|
|
import net.sourceforge.filebot.web.SubtitleProvider;
|
2011-09-14 14:13:34 -04:00
|
|
|
|
|
|
|
|
|
|
|
public final class SubtitleUtilities {
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
|
|
|
|
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
|
|
|
|
|
|
|
SimilarityMetric[] metrics = EpisodeMetrics.defaultSequence(false);
|
|
|
|
|
|
|
|
// optimize for generic media <-> subtitle matching
|
|
|
|
replaceAll(asList(metrics), EpisodeMetrics.SubstringFields, EpisodeMetrics.SubstringSequence);
|
|
|
|
|
|
|
|
// first match everything as best as possible, then filter possibly bad matches
|
|
|
|
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
|
|
|
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
|
|
|
|
|
|
|
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
|
|
|
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.5f)) {
|
|
|
|
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return subtitleByVideo;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-25 13:52:31 -05:00
|
|
|
public static List<SubtitleDescriptor> findSubtitles(SubtitleProvider service, Collection<String> querySet, String languageName) throws Exception {
|
|
|
|
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>();
|
|
|
|
|
|
|
|
// search for and automatically select movie / show entry
|
|
|
|
Set<SearchResult> resultSet = new HashSet<SearchResult>();
|
|
|
|
for (String query : querySet) {
|
2012-07-16 06:09:21 -04:00
|
|
|
resultSet.addAll(findProbableSearchResults(query, service.search(query)));
|
2011-11-25 13:52:31 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// fetch subtitles for all search results
|
|
|
|
for (SearchResult it : resultSet) {
|
|
|
|
subtitles.addAll(service.getSubtitleList(it, languageName));
|
|
|
|
}
|
|
|
|
|
|
|
|
return subtitles;
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
|
|
|
protected static Collection<SearchResult> findProbableSearchResults(String query, Iterable<? extends SearchResult> searchResults) {
|
2011-11-25 13:52:31 -05:00
|
|
|
// auto-select most probable search result
|
|
|
|
Set<SearchResult> probableMatches = new LinkedHashSet<SearchResult>();
|
|
|
|
|
|
|
|
// use name similarity metric
|
2012-07-16 06:09:21 -04:00
|
|
|
SimilarityMetric metric = new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric());
|
2011-11-25 13:52:31 -05:00
|
|
|
|
|
|
|
// find probable matches using name similarity > threshold
|
|
|
|
for (SearchResult result : searchResults) {
|
2012-07-16 06:09:21 -04:00
|
|
|
if (metric.getSimilarity(query, removeTrailingBrackets(result.getName())) > 0.8f) {
|
2011-11-25 13:52:31 -05:00
|
|
|
probableMatches.add(result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return probableMatches;
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-09-14 14:13:34 -04:00
|
|
|
/**
|
|
|
|
* Detect charset and parse subtitle file even if extension is invalid
|
|
|
|
*/
|
|
|
|
public static List<SubtitleElement> decodeSubtitles(MemoryFile file) throws IOException {
|
|
|
|
// gather all formats, put likely formats first
|
|
|
|
LinkedList<SubtitleFormat> likelyFormats = new LinkedList<SubtitleFormat>();
|
|
|
|
|
|
|
|
for (SubtitleFormat format : SubtitleFormat.values()) {
|
|
|
|
if (format.getFilter().accept(file.getName()))
|
|
|
|
likelyFormats.addFirst(format);
|
|
|
|
else
|
|
|
|
likelyFormats.addLast(format);
|
|
|
|
}
|
|
|
|
|
|
|
|
// decode bytes
|
|
|
|
String textfile = getText(file.getData());
|
|
|
|
|
|
|
|
// decode subtitle file with the first reader that seems to work
|
|
|
|
for (SubtitleFormat format : likelyFormats) {
|
|
|
|
// reset reader to position 0
|
|
|
|
SubtitleReader parser = format.newReader(new StringReader(textfile));
|
|
|
|
|
|
|
|
if (parser.hasNext()) {
|
|
|
|
// correct format found
|
|
|
|
List<SubtitleElement> list = new ArrayList<SubtitleElement>(500);
|
|
|
|
|
|
|
|
// read subtitle file
|
|
|
|
while (parser.hasNext()) {
|
|
|
|
list.add(parser.next());
|
|
|
|
}
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// unsupported subtitle format
|
|
|
|
throw new IOException("Cannot read subtitle format");
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-09-14 14:13:34 -04:00
|
|
|
public static ByteBuffer exportSubtitles(MemoryFile data, SubtitleFormat outputFormat, long outputTimingOffset, Charset outputEncoding) throws IOException {
|
|
|
|
if (outputFormat != null && outputFormat != SubtitleFormat.SubRip) {
|
|
|
|
throw new IllegalArgumentException("Format not supported");
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert to target format and target encoding
|
|
|
|
if (outputFormat == SubtitleFormat.SubRip) {
|
|
|
|
// output buffer
|
|
|
|
StringBuilder buffer = new StringBuilder(4 * 1024);
|
|
|
|
SubRipWriter out = new SubRipWriter(buffer);
|
|
|
|
|
|
|
|
for (SubtitleElement it : decodeSubtitles(data)) {
|
|
|
|
if (outputTimingOffset != 0)
|
|
|
|
it = new SubtitleElement(max(0, it.getStart() + outputTimingOffset), max(0, it.getEnd() + outputTimingOffset), it.getText());
|
|
|
|
|
|
|
|
out.write(it);
|
|
|
|
}
|
|
|
|
|
|
|
|
return outputEncoding.encode(CharBuffer.wrap(buffer));
|
|
|
|
}
|
|
|
|
|
|
|
|
// only change encoding
|
|
|
|
return outputEncoding.encode(getText(data.getData()));
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-09-14 14:13:34 -04:00
|
|
|
public static SubtitleFormat getSubtitleFormat(File file) {
|
|
|
|
for (SubtitleFormat it : SubtitleFormat.values()) {
|
|
|
|
if (it.getFilter().accept(file))
|
|
|
|
return it;
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-09-14 14:13:34 -04:00
|
|
|
public static SubtitleFormat getSubtitleFormatByName(String name) {
|
|
|
|
for (SubtitleFormat it : SubtitleFormat.values()) {
|
|
|
|
// check by name
|
|
|
|
if (it.name().equalsIgnoreCase(name))
|
|
|
|
return it;
|
|
|
|
|
|
|
|
// check by extension
|
|
|
|
if (it.getFilter().acceptExtension(name))
|
|
|
|
return it;
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-11-25 13:52:31 -05:00
|
|
|
public static String formatSubtitle(String name, String languageName, String type) {
|
|
|
|
StringBuilder sb = new StringBuilder(name);
|
|
|
|
|
|
|
|
if (languageName != null) {
|
|
|
|
String lang = Language.getISO3LanguageCodeByName(languageName);
|
|
|
|
|
|
|
|
if (lang == null) {
|
|
|
|
// we probably won't get here, but just in case
|
|
|
|
lang = languageName.replaceAll("\\W", "");
|
|
|
|
}
|
|
|
|
|
|
|
|
sb.append('.').append(lang);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (type != null) {
|
|
|
|
sb.append('.').append(type);
|
|
|
|
}
|
|
|
|
|
|
|
|
return sb.toString();
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-11-25 13:52:31 -05:00
|
|
|
public static MemoryFile fetchSubtitle(SubtitleDescriptor descriptor) throws Exception {
|
|
|
|
ByteBuffer data = descriptor.fetch();
|
|
|
|
|
|
|
|
// extract subtitles from archive
|
|
|
|
ArchiveType type = ArchiveType.forName(descriptor.getType());
|
|
|
|
|
|
|
|
if (type != ArchiveType.UNKOWN) {
|
|
|
|
// extract subtitle from archive
|
|
|
|
Iterator<MemoryFile> it = type.fromData(data).iterator();
|
|
|
|
while (it.hasNext()) {
|
|
|
|
MemoryFile entry = it.next();
|
|
|
|
if (SUBTITLE_FILES.accept(entry.getName())) {
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// assume that the fetched data is the subtitle
|
|
|
|
return new MemoryFile(descriptor.getPath(), data);
|
|
|
|
}
|
|
|
|
|
2012-07-16 06:09:21 -04:00
|
|
|
|
2011-09-14 14:13:34 -04:00
|
|
|
/**
|
|
|
|
* Dummy constructor to prevent instantiation.
|
|
|
|
*/
|
|
|
|
private SubtitleUtilities() {
|
|
|
|
throw new UnsupportedOperationException();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|