mirror of
https://github.com/mitb-archive/filebot
synced 2024-11-02 00:15:02 -04:00
Improved movie grouping for subtitle files
This commit is contained in:
parent
46a181b9b1
commit
81d9b6a2f6
@ -558,17 +558,18 @@ public class MediaBindingBean {
|
||||
|
||||
@Define("lang")
|
||||
public Language getLanguageTag() throws Exception {
|
||||
Locale languageSuffix = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
|
||||
if (languageSuffix != null) {
|
||||
return Language.getLanguage(languageSuffix);
|
||||
// grep language from filename
|
||||
Locale languageTag = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
|
||||
if (languageTag != null) {
|
||||
return Language.getLanguage(languageTag);
|
||||
}
|
||||
|
||||
// try to auto-detect subtitle language
|
||||
// detect language from subtitle text content
|
||||
if (SUBTITLE_FILES.accept(getMediaFile())) {
|
||||
try {
|
||||
return Language.getLanguage(detectSubtitleLanguage(getMediaFile()));
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e);
|
||||
return detectSubtitleLanguage(getMediaFile());
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Failed to detect subtitle language: " + e, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ import static net.filebot.MediaTypes.*;
|
||||
import static net.filebot.media.XattrMetaInfo.*;
|
||||
import static net.filebot.similarity.CommonSequenceMatcher.*;
|
||||
import static net.filebot.similarity.Normalization.*;
|
||||
import static net.filebot.subtitle.SubtitleUtilities.*;
|
||||
import static net.filebot.util.FileUtilities.*;
|
||||
import static net.filebot.util.RegularExpressions.*;
|
||||
import static net.filebot.util.StringUtilities.*;
|
||||
@ -43,6 +44,7 @@ import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.filebot.ApplicationFolder;
|
||||
import net.filebot.Language;
|
||||
import net.filebot.Resource;
|
||||
import net.filebot.WebServices;
|
||||
import net.filebot.archive.Archive;
|
||||
@ -1107,6 +1109,15 @@ public class MediaDetection {
|
||||
} catch (Exception e) {
|
||||
debug.warning(format("Failed to read media characteristics: %s", e.getMessage()));
|
||||
}
|
||||
} else if (SUBTITLE_FILES.accept(f) && f.length() > ONE_KILOBYTE) {
|
||||
try {
|
||||
Language language = detectSubtitleLanguage(f);
|
||||
if (language != null) {
|
||||
return asList(language.getCode());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
debug.warning(format("Failed to detect subtitle language: %s", e.getMessage()));
|
||||
}
|
||||
}
|
||||
return emptyList();
|
||||
})).forEach((group, videos) -> groups.add(videos));
|
||||
|
@ -33,6 +33,15 @@ import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.optimaize.langdetect.DetectedLanguage;
|
||||
import com.optimaize.langdetect.LanguageDetector;
|
||||
import com.optimaize.langdetect.LanguageDetectorBuilder;
|
||||
import com.optimaize.langdetect.i18n.LdLocale;
|
||||
import com.optimaize.langdetect.ngram.NgramExtractors;
|
||||
import com.optimaize.langdetect.profiles.BuiltInLanguages;
|
||||
import com.optimaize.langdetect.profiles.LanguageProfile;
|
||||
import com.optimaize.langdetect.profiles.LanguageProfileReader;
|
||||
|
||||
import net.filebot.Language;
|
||||
import net.filebot.similarity.EpisodeMetrics;
|
||||
import net.filebot.similarity.Match;
|
||||
@ -53,15 +62,6 @@ import net.filebot.web.SubtitleProvider;
|
||||
import net.filebot.web.SubtitleSearchResult;
|
||||
import net.filebot.web.VideoHashSubtitleService;
|
||||
|
||||
import com.optimaize.langdetect.DetectedLanguage;
|
||||
import com.optimaize.langdetect.LanguageDetector;
|
||||
import com.optimaize.langdetect.LanguageDetectorBuilder;
|
||||
import com.optimaize.langdetect.i18n.LdLocale;
|
||||
import com.optimaize.langdetect.ngram.NgramExtractors;
|
||||
import com.optimaize.langdetect.profiles.BuiltInLanguages;
|
||||
import com.optimaize.langdetect.profiles.LanguageProfile;
|
||||
import com.optimaize.langdetect.profiles.LanguageProfileReader;
|
||||
|
||||
public final class SubtitleUtilities {
|
||||
|
||||
public static Map<File, List<SubtitleDescriptor>> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection<File> files, String languageName, boolean addOptions, boolean strict) throws Exception {
|
||||
@ -437,19 +437,31 @@ public final class SubtitleUtilities {
|
||||
return new MemoryFile(descriptor.getPath(), data);
|
||||
}
|
||||
|
||||
public static String detectSubtitleLanguage(File file) throws IOException {
|
||||
MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
|
||||
String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
|
||||
|
||||
// detect language
|
||||
List<DetectedLanguage> probabilities = createLanguageDetector().getProbabilities(subtitleText);
|
||||
|
||||
if (probabilities.size() > 0) {
|
||||
return probabilities.get(0).getLocale().getLanguage();
|
||||
public static Language detectSubtitleLanguage(File file) throws IOException {
|
||||
// grep language from filename
|
||||
Locale languageTag = releaseInfo.getSubtitleLanguageTag(getName(file));
|
||||
if (languageTag != null) {
|
||||
return Language.getLanguage(languageTag);
|
||||
}
|
||||
|
||||
// detect language from subtitle text content
|
||||
MemoryFile data = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
|
||||
List<DetectedLanguage> options = detectSubtitleLanguage(data);
|
||||
if (options.size() > 0) {
|
||||
return Language.getLanguage(options.get(0).getLocale().getLanguage());
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static List<DetectedLanguage> detectSubtitleLanguage(MemoryFile file) throws IOException {
|
||||
// decode subtitles
|
||||
String text = decodeSubtitles(file).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
|
||||
|
||||
// detect text language
|
||||
return createLanguageDetector().getProbabilities(text);
|
||||
}
|
||||
|
||||
private static LanguageDetectorBuilder languageDetector;
|
||||
|
||||
private static LanguageDetector createLanguageDetector() throws IOException {
|
||||
|
Loading…
Reference in New Issue
Block a user