mirror of
https://github.com/mitb-archive/filebot
synced 2024-11-02 00:15:02 -04:00
Improved movie grouping for subtitle files
This commit is contained in:
parent
46a181b9b1
commit
81d9b6a2f6
@ -558,17 +558,18 @@ public class MediaBindingBean {
|
|||||||
|
|
||||||
@Define("lang")
|
@Define("lang")
|
||||||
public Language getLanguageTag() throws Exception {
|
public Language getLanguageTag() throws Exception {
|
||||||
Locale languageSuffix = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
|
// grep language from filename
|
||||||
if (languageSuffix != null) {
|
Locale languageTag = releaseInfo.getSubtitleLanguageTag(getFileNames(getMediaFile()));
|
||||||
return Language.getLanguage(languageSuffix);
|
if (languageTag != null) {
|
||||||
|
return Language.getLanguage(languageTag);
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to auto-detect subtitle language
|
// detect language from subtitle text content
|
||||||
if (SUBTITLE_FILES.accept(getMediaFile())) {
|
if (SUBTITLE_FILES.accept(getMediaFile())) {
|
||||||
try {
|
try {
|
||||||
return Language.getLanguage(detectSubtitleLanguage(getMediaFile()));
|
return detectSubtitleLanguage(getMediaFile());
|
||||||
} catch (Throwable e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException("Failed to auto-detect subtitle language: " + e, e);
|
throw new RuntimeException("Failed to detect subtitle language: " + e, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ import static net.filebot.MediaTypes.*;
|
|||||||
import static net.filebot.media.XattrMetaInfo.*;
|
import static net.filebot.media.XattrMetaInfo.*;
|
||||||
import static net.filebot.similarity.CommonSequenceMatcher.*;
|
import static net.filebot.similarity.CommonSequenceMatcher.*;
|
||||||
import static net.filebot.similarity.Normalization.*;
|
import static net.filebot.similarity.Normalization.*;
|
||||||
|
import static net.filebot.subtitle.SubtitleUtilities.*;
|
||||||
import static net.filebot.util.FileUtilities.*;
|
import static net.filebot.util.FileUtilities.*;
|
||||||
import static net.filebot.util.RegularExpressions.*;
|
import static net.filebot.util.RegularExpressions.*;
|
||||||
import static net.filebot.util.StringUtilities.*;
|
import static net.filebot.util.StringUtilities.*;
|
||||||
@ -43,6 +44,7 @@ import java.util.regex.Matcher;
|
|||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import net.filebot.ApplicationFolder;
|
import net.filebot.ApplicationFolder;
|
||||||
|
import net.filebot.Language;
|
||||||
import net.filebot.Resource;
|
import net.filebot.Resource;
|
||||||
import net.filebot.WebServices;
|
import net.filebot.WebServices;
|
||||||
import net.filebot.archive.Archive;
|
import net.filebot.archive.Archive;
|
||||||
@ -1107,6 +1109,15 @@ public class MediaDetection {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
debug.warning(format("Failed to read media characteristics: %s", e.getMessage()));
|
debug.warning(format("Failed to read media characteristics: %s", e.getMessage()));
|
||||||
}
|
}
|
||||||
|
} else if (SUBTITLE_FILES.accept(f) && f.length() > ONE_KILOBYTE) {
|
||||||
|
try {
|
||||||
|
Language language = detectSubtitleLanguage(f);
|
||||||
|
if (language != null) {
|
||||||
|
return asList(language.getCode());
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
debug.warning(format("Failed to detect subtitle language: %s", e.getMessage()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return emptyList();
|
return emptyList();
|
||||||
})).forEach((group, videos) -> groups.add(videos));
|
})).forEach((group, videos) -> groups.add(videos));
|
||||||
|
@ -33,6 +33,15 @@ import java.util.function.Predicate;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.optimaize.langdetect.DetectedLanguage;
|
||||||
|
import com.optimaize.langdetect.LanguageDetector;
|
||||||
|
import com.optimaize.langdetect.LanguageDetectorBuilder;
|
||||||
|
import com.optimaize.langdetect.i18n.LdLocale;
|
||||||
|
import com.optimaize.langdetect.ngram.NgramExtractors;
|
||||||
|
import com.optimaize.langdetect.profiles.BuiltInLanguages;
|
||||||
|
import com.optimaize.langdetect.profiles.LanguageProfile;
|
||||||
|
import com.optimaize.langdetect.profiles.LanguageProfileReader;
|
||||||
|
|
||||||
import net.filebot.Language;
|
import net.filebot.Language;
|
||||||
import net.filebot.similarity.EpisodeMetrics;
|
import net.filebot.similarity.EpisodeMetrics;
|
||||||
import net.filebot.similarity.Match;
|
import net.filebot.similarity.Match;
|
||||||
@ -53,15 +62,6 @@ import net.filebot.web.SubtitleProvider;
|
|||||||
import net.filebot.web.SubtitleSearchResult;
|
import net.filebot.web.SubtitleSearchResult;
|
||||||
import net.filebot.web.VideoHashSubtitleService;
|
import net.filebot.web.VideoHashSubtitleService;
|
||||||
|
|
||||||
import com.optimaize.langdetect.DetectedLanguage;
|
|
||||||
import com.optimaize.langdetect.LanguageDetector;
|
|
||||||
import com.optimaize.langdetect.LanguageDetectorBuilder;
|
|
||||||
import com.optimaize.langdetect.i18n.LdLocale;
|
|
||||||
import com.optimaize.langdetect.ngram.NgramExtractors;
|
|
||||||
import com.optimaize.langdetect.profiles.BuiltInLanguages;
|
|
||||||
import com.optimaize.langdetect.profiles.LanguageProfile;
|
|
||||||
import com.optimaize.langdetect.profiles.LanguageProfileReader;
|
|
||||||
|
|
||||||
public final class SubtitleUtilities {
|
public final class SubtitleUtilities {
|
||||||
|
|
||||||
public static Map<File, List<SubtitleDescriptor>> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection<File> files, String languageName, boolean addOptions, boolean strict) throws Exception {
|
public static Map<File, List<SubtitleDescriptor>> lookupSubtitlesByHash(VideoHashSubtitleService service, Collection<File> files, String languageName, boolean addOptions, boolean strict) throws Exception {
|
||||||
@ -437,19 +437,31 @@ public final class SubtitleUtilities {
|
|||||||
return new MemoryFile(descriptor.getPath(), data);
|
return new MemoryFile(descriptor.getPath(), data);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String detectSubtitleLanguage(File file) throws IOException {
|
public static Language detectSubtitleLanguage(File file) throws IOException {
|
||||||
MemoryFile subtitleFile = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
|
// grep language from filename
|
||||||
String subtitleText = decodeSubtitles(subtitleFile).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
|
Locale languageTag = releaseInfo.getSubtitleLanguageTag(getName(file));
|
||||||
|
if (languageTag != null) {
|
||||||
// detect language
|
return Language.getLanguage(languageTag);
|
||||||
List<DetectedLanguage> probabilities = createLanguageDetector().getProbabilities(subtitleText);
|
|
||||||
|
|
||||||
if (probabilities.size() > 0) {
|
|
||||||
return probabilities.get(0).getLocale().getLanguage();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// detect language from subtitle text content
|
||||||
|
MemoryFile data = new MemoryFile(file.getName(), ByteBuffer.wrap(readFile(file)));
|
||||||
|
List<DetectedLanguage> options = detectSubtitleLanguage(data);
|
||||||
|
if (options.size() > 0) {
|
||||||
|
return Language.getLanguage(options.get(0).getLocale().getLanguage());
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<DetectedLanguage> detectSubtitleLanguage(MemoryFile file) throws IOException {
|
||||||
|
// decode subtitles
|
||||||
|
String text = decodeSubtitles(file).stream().map(SubtitleElement::getText).collect(Collectors.joining("\n"));
|
||||||
|
|
||||||
|
// detect text language
|
||||||
|
return createLanguageDetector().getProbabilities(text);
|
||||||
|
}
|
||||||
|
|
||||||
private static LanguageDetectorBuilder languageDetector;
|
private static LanguageDetectorBuilder languageDetector;
|
||||||
|
|
||||||
private static LanguageDetector createLanguageDetector() throws IOException {
|
private static LanguageDetector createLanguageDetector() throws IOException {
|
||||||
|
Loading…
Reference in New Issue
Block a user