1
0
mirror of https://github.com/mitb-archive/filebot synced 2025-01-10 21:38:04 -05:00

Improved movie grouping for subtitle files

This commit is contained in:
Reinhard Pointner 2016-09-08 10:58:10 +08:00
parent 46a181b9b1
commit 81d9b6a2f6
3 changed files with 49 additions and 25 deletions

View File

@ -558,17 +558,18 @@ public class MediaBindingBean {
@Define("lang")
public Language getLanguageTag() throws Exception {
Locale languageSuffix = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
if (languageSuffix != null) {
return Language.getLanguage(languageSuffix);
// grep language from filename
Locale languageTag = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
if (languageTag != null) {
return Language.getLanguage(languageTag);
}
// try to auto-detect subtitle language
// detect language from subtitle text content
if (SUBTITLE_FILES.accept(getMediaFile())) {
try {
return Language.getLanguage(detectSubtitleLanguage(getMediaFile()));
} catch (Throwable e) {
throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e);
return detectSubtitleLanguage(getMediaFile());
} catch (Exception e) {
throw new RuntimeException("Failed to detect subtitle language: " + e, e);
}
}

View File

@ -9,6 +9,7 @@ import static net.filebot.MediaTypes.*;
import static net.filebot.media.XattrMetaInfo.*;
import static net.filebot.similarity.CommonSequenceMatcher.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.subtitle.SubtitleUtilities.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.RegularExpressions.*;
import static net.filebot.util.StringUtilities.*;
@ -43,6 +44,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.filebot.ApplicationFolder;
import net.filebot.Language;
import net.filebot.Resource;
import net.filebot.WebServices;
import net.filebot.archive.Archive;
@ -1107,6 +1109,15 @@ public class MediaDetection {
} catch (Exception e) {
debug.warning(format("Failed to read media characteristics: %s", e.getMessage()));
}
} else if (SUBTITLE_FILES.accept(f) && f.length() > ONE_KILOBYTE) {
try {
Language language = detectSubtitleLanguage(f);
if (language != null) {
return asList(language.getCode());
}
} catch (Exception e) {
debug.warning(format("Failed to detect subtitle language: %s", e.getMessage()));
}
}
return emptyList();
})).forEach((group, videos) -> groups.add(videos));

View File

@ -33,6 +33,15 @@ import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.optimaize.langdetect.DetectedLanguage;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.BuiltInLanguages;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import net.filebot.Language;
import net.filebot.similarity.EpisodeMetrics;
import net.filebot.similarity.Match;
@ -53,15 +62,6 @@ import net.filebot.web.SubtitleProvider;
import net.filebot.web.SubtitleSearchResult;
import net.filebot.web.VideoHashSubtitleService;
import com.optimaize.langdetect.DetectedLanguage;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.BuiltInLanguages;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
public final class SubtitleUtilities {
public static Map<File, List<SubtitleDescriptor>> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection<File> files, String languageName, boolean addOptions, boolean strict) throws Exception {
@ -437,19 +437,31 @@ public final class SubtitleUtilities {
return new MemoryFile(descriptor.getPath(), data);
}
public static String detectSubtitleLanguage(File file) throws IOException {
MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
// detect language
List<DetectedLanguage> probabilities = createLanguageDetector().getProbabilities(subtitleText);
if (probabilities.size() > 0) {
return probabilities.get(0).getLocale().getLanguage();
public static Language detectSubtitleLanguage(File file) throws IOException {
// grep language from filename
Locale languageTag = releaseInfo.getSubtitleLanguageTag(getName(file));
if (languageTag != null) {
return Language.getLanguage(languageTag);
}
// detect language from subtitle text content
MemoryFile data = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
List<DetectedLanguage> options = detectSubtitleLanguage(data);
if (options.size() > 0) {
return Language.getLanguage(options.get(0).getLocale().getLanguage());
}
return null;
}
public static List<DetectedLanguage> detectSubtitleLanguage(MemoryFile file) throws IOException {
// decode subtitles
String text = decodeSubtitles(file).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
// detect text language
return createLanguageDetector().getProbabilities(text);
}
private static LanguageDetectorBuilder languageDetector;
private static LanguageDetector createLanguageDetector() throws IOException {