1
0
mirror of https://github.com/mitb-archive/filebot synced 2024-11-02 00:15:02 -04:00

Improved movie grouping for subtitle files

This commit is contained in:
Reinhard Pointner 2016-09-08 10:58:10 +08:00
parent 46a181b9b1
commit 81d9b6a2f6
3 changed files with 49 additions and 25 deletions

View File

@ -558,17 +558,18 @@ public class MediaBindingBean {
@Define("lang") @Define("lang")
public Language getLanguageTag() throws Exception { public Language getLanguageTag() throws Exception {
Locale languageSuffix = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile())); // grep language from filename
if (languageSuffix != null) { Locale languageTag = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
return Language.getLanguage(languageSuffix); if (languageTag != null) {
return Language.getLanguage(languageTag);
} }
// try to auto-detect subtitle language // detect language from subtitle text content
if (SUBTITLE_FILES.accept(getMediaFile())) { if (SUBTITLE_FILES.accept(getMediaFile())) {
try { try {
return Language.getLanguage(detectSubtitleLanguage(getMediaFile())); return detectSubtitleLanguage(getMediaFile());
} catch (Throwable e) { } catch (Exception e) {
throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e); throw new RuntimeException("Failed to detect subtitle language: " + e, e);
} }
} }

View File

@ -9,6 +9,7 @@ import static net.filebot.MediaTypes.*;
import static net.filebot.media.XattrMetaInfo.*; import static net.filebot.media.XattrMetaInfo.*;
import static net.filebot.similarity.CommonSequenceMatcher.*; import static net.filebot.similarity.CommonSequenceMatcher.*;
import static net.filebot.similarity.Normalization.*; import static net.filebot.similarity.Normalization.*;
import static net.filebot.subtitle.SubtitleUtilities.*;
import static net.filebot.util.FileUtilities.*; import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.RegularExpressions.*; import static net.filebot.util.RegularExpressions.*;
import static net.filebot.util.StringUtilities.*; import static net.filebot.util.StringUtilities.*;
@ -43,6 +44,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.filebot.ApplicationFolder; import net.filebot.ApplicationFolder;
import net.filebot.Language;
import net.filebot.Resource; import net.filebot.Resource;
import net.filebot.WebServices; import net.filebot.WebServices;
import net.filebot.archive.Archive; import net.filebot.archive.Archive;
@ -1107,6 +1109,15 @@ public class MediaDetection {
} catch (Exception e) { } catch (Exception e) {
debug.warning(format("Failed to read media characteristics: %s", e.getMessage())); debug.warning(format("Failed to read media characteristics: %s", e.getMessage()));
} }
} else if (SUBTITLE_FILES.accept(f) && f.length() > ONE_KILOBYTE) {
try {
Language language = detectSubtitleLanguage(f);
if (language != null) {
return asList(language.getCode());
}
} catch (Exception e) {
debug.warning(format("Failed to detect subtitle language: %s", e.getMessage()));
}
} }
return emptyList(); return emptyList();
})).forEach((group, videos) -> groups.add(videos)); })).forEach((group, videos) -> groups.add(videos));

View File

@ -33,6 +33,15 @@ import java.util.function.Predicate;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import com.optimaize.langdetect.DetectedLanguage;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.BuiltInLanguages;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import net.filebot.Language; import net.filebot.Language;
import net.filebot.similarity.EpisodeMetrics; import net.filebot.similarity.EpisodeMetrics;
import net.filebot.similarity.Match; import net.filebot.similarity.Match;
@ -53,15 +62,6 @@ import net.filebot.web.SubtitleProvider;
import net.filebot.web.SubtitleSearchResult; import net.filebot.web.SubtitleSearchResult;
import net.filebot.web.VideoHashSubtitleService; import net.filebot.web.VideoHashSubtitleService;
import com.optimaize.langdetect.DetectedLanguage;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.BuiltInLanguages;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
public final class SubtitleUtilities { public final class SubtitleUtilities {
public static Map<File, List<SubtitleDescriptor>> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection<File> files, String languageName, boolean addOptions, boolean strict) throws Exception { public static Map<File, List<SubtitleDescriptor>> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection<File> files, String languageName, boolean addOptions, boolean strict) throws Exception {
@ -437,19 +437,31 @@ public final class SubtitleUtilities {
return new MemoryFile(descriptor.getPath(), data); return new MemoryFile(descriptor.getPath(), data);
} }
public static String detectSubtitleLanguage(File file) throws IOException { public static Language detectSubtitleLanguage(File file) throws IOException {
MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file))); // grep language from filename
String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n")); Locale languageTag = releaseInfo.getSubtitleLanguageTag(getName(file));
if (languageTag != null) {
// detect language return Language.getLanguage(languageTag);
List<DetectedLanguage> probabilities = createLanguageDetector().getProbabilities(subtitleText);
if (probabilities.size() > 0) {
return probabilities.get(0).getLocale().getLanguage();
} }
// detect language from subtitle text content
MemoryFile data = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
List<DetectedLanguage> options = detectSubtitleLanguage(data);
if (options.size() > 0) {
return Language.getLanguage(options.get(0).getLocale().getLanguage());
}
return null; return null;
} }
public static List<DetectedLanguage> detectSubtitleLanguage(MemoryFile file) throws IOException {
// decode subtitles
String text = decodeSubtitles(file).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
// detect text language
return createLanguageDetector().getProbabilities(text);
}
private static LanguageDetectorBuilder languageDetector; private static LanguageDetectorBuilder languageDetector;
private static LanguageDetector createLanguageDetector() throws IOException { private static LanguageDetector createLanguageDetector() throws IOException {