diff --git a/source/net/filebot/cli/CmdlineOperations.java b/source/net/filebot/cli/CmdlineOperations.java index 2583850e..384fecf0 100644 --- a/source/net/filebot/cli/CmdlineOperations.java +++ b/source/net/filebot/cli/CmdlineOperations.java @@ -778,7 +778,7 @@ public class CmdlineOperations implements CmdlineInterface { private final String languageCode = Language.getStandardLanguageCode(getLanguage(languageName).getName()); public boolean matchesLanguageCode(File f) { - Locale languageSuffix = MediaDetection.releaseInfo.getLanguageSuffix(FileUtilities.getName(f)); + Locale languageSuffix = MediaDetection.releaseInfo.getLanguageTag(FileUtilities.getName(f)); Language language = Language.getLanguage(languageSuffix); if (language != null) { return language.getISO3().equalsIgnoreCase(languageCode); diff --git a/source/net/filebot/format/MediaBindingBean.java b/source/net/filebot/format/MediaBindingBean.java index 1f0133f6..a72b7faf 100644 --- a/source/net/filebot/format/MediaBindingBean.java +++ b/source/net/filebot/format/MediaBindingBean.java @@ -518,9 +518,28 @@ public class MediaBindingBean { return releaseInfo.getReleaseGroup(filenames); } + @Define("sub") + public String getSubtitleTags() throws Exception { + if (!SUBTITLE_FILES.accept(getMediaFile())) { + return null; + } + + Language language = getLanguageTag(); + if (language != null) { + String tag = '.' + language.getISO3B(); + String category = releaseInfo.getSubtitleCategoryTag(FileUtilities.getName(getMediaFile()), getOriginalFileName(getMediaFile())); + if (category != null) { + return tag + '.' + category; + } + return tag; + } + + return null; + } + @Define("lang") - public Language getSubtitleLanguage() throws Exception { - Locale languageSuffix = releaseInfo.getLanguageSuffix(FileUtilities.getName(getMediaFile())); + public Language getLanguageTag() throws Exception { + Locale languageSuffix = releaseInfo.getLanguageTag(FileUtilities.getName(getMediaFile()), getOriginalFileName(getMediaFile())); if (languageSuffix != null) { return Language.getLanguage(languageSuffix); } diff --git a/source/net/filebot/media/MediaDetection.java b/source/net/filebot/media/MediaDetection.java index 7961d0c2..67f67f75 100644 --- a/source/net/filebot/media/MediaDetection.java +++ b/source/net/filebot/media/MediaDetection.java @@ -111,7 +111,7 @@ public class MediaDetection { } public static Locale guessLanguageFromSuffix(File file) { - return releaseInfo.getLanguageSuffix(getName(file)); + return releaseInfo.getLanguageTag(getName(file)); } private static final SeasonEpisodeMatcher seasonEpisodeMatcherStrict = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true); @@ -1089,7 +1089,7 @@ public class MediaDetection { // allow extended extensions for subtitles files, for example name.eng.srt => map by en.srt if (key != null && SUBTITLE_FILES.accept(file)) { - Locale locale = releaseInfo.getLanguageSuffix(getName(file)); + Locale locale = releaseInfo.getLanguageTag(getName(file)); if (locale != null) { key = locale.getLanguage() + '.' + key; } diff --git a/source/net/filebot/media/ReleaseInfo.java b/source/net/filebot/media/ReleaseInfo.java index d69dd9d8..a3a5ab42 100644 --- a/source/net/filebot/media/ReleaseInfo.java +++ b/source/net/filebot/media/ReleaseInfo.java @@ -110,22 +110,25 @@ public class ReleaseInfo { return matchLast(getReleaseGroupPattern(false), groups, strings); } - private Map languages; - private Pattern languageSuffix; + private Pattern languageTag; - public Locale getLanguageSuffix(String name) { + public Locale getLanguageTag(CharSequence... name) { // match locale identifier and lookup Locale object - if (languages == null || languageSuffix == null) { - languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()); - languageSuffix = getLanguageSuffixPattern(languages.keySet(), false); + if (languageTag == null) { + languageTag = getSubtitleLanguageTagPattern(getDefaultLanguageMap().keySet()); } + String lang = matchLast(languageTag, null, name); + return lang == null ? null : getDefaultLanguageMap().get(lang); + } - String lang = matchLast(languageSuffix, null, name); - if (lang == null) { - return null; + private Pattern categoryTag; + + public String getSubtitleCategoryTag(CharSequence... name) { + // match locale identifier and lookup Locale object + if (categoryTag == null) { + categoryTag = getSubtitleCategoryTagPattern(getDefaultLanguageMap().keySet()); } - - return languages.get(lang); + return matchLast(categoryTag, getSubtitleCategoryTags(), name); } protected String matchLast(Pattern pattern, String[] paragon, CharSequence... sequence) { @@ -161,11 +164,11 @@ public class ReleaseInfo { // initialize cached patterns if (stopwords[b] == null || blacklist[b] == null) { - Set languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet(); + Set languages = getDefaultLanguageMap().keySet(); Pattern clutterBracket = getClutterBracketPattern(strict); Pattern releaseGroup = getReleaseGroupPattern(strict); Pattern releaseGroupTrim = getReleaseGroupTrimPattern(); - Pattern languageSuffix = getLanguageSuffixPattern(languages, strict); + Pattern languageSuffix = getSubtitleLanguageTagPattern(languages); Pattern languageTag = getLanguageTagPattern(languages); Pattern videoSource = getVideoSourcePattern(); Pattern videoTags = getVideoTagPattern(); @@ -261,9 +264,14 @@ public class ReleaseInfo { return compile("(?<=[-\\[{(])" + or(quoteAll(languages)) + "(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); } - public Pattern getLanguageSuffixPattern(Collection languages, boolean strict) { + public Pattern getSubtitleCategoryTagPattern(Collection languages) { // e.g. ".en.srt" or ".en.forced.srt" - return compile("(?<=[._-])" + or(quoteAll(languages)) + "(?=([._-](" + getProperty("pattern.subtitle.tags") + "))?$)", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); + return compile("(?<=[._-](" + or(quoteAll(languages)) + ")[._-])" + or(getSubtitleCategoryTags()) + "$", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); + } + + public Pattern getSubtitleLanguageTagPattern(Collection languages) { + // e.g. ".en.srt" or ".en.forced.srt" + return compile("(?<=[._-])" + or(quoteAll(languages)) + "(?=([._-]" + or(getSubtitleCategoryTags()) + ")?$)", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); } public Pattern getResolutionPattern() { @@ -380,6 +388,10 @@ public class ReleaseInfo { return roots; } + public String[] getSubtitleCategoryTags() { + return getProperty("pattern.subtitle.tags").split("\\|"); + } + protected final Resource> seriesMappings = resource("url.series-mappings", Cache.ONE_WEEK, Function.identity(), String[]::new).transform(lines -> { Map map = new LinkedHashMap(lines.length); stream(lines).map(s -> s.split("\t", 2)).filter(v -> v.length == 2).forEach(v -> { @@ -517,7 +529,19 @@ public class ReleaseInfo { return values.stream().map((s) -> Pattern.quote(s)).toArray(String[]::new); } - public Map getLanguageMap(Locale... supportedDisplayLocale) { + private Map defaultLanguageMap; + + public Map getDefaultLanguageMap() { + if (defaultLanguageMap == null) { + defaultLanguageMap = getLanguageMap(Locale.ENGLISH, Locale.getDefault()); + } + return defaultLanguageMap; + } + + public Map getLanguageMap(Locale... displayLanguages) { + // unique + displayLanguages = stream(displayLanguages).distinct().toArray(Locale[]::new); + // use maximum strength collator by default Collator collator = Collator.getInstance(Locale.ENGLISH); collator.setDecomposition(Collator.FULL_DECOMPOSITION); @@ -533,7 +557,7 @@ public class ReleaseInfo { languageMap.put(locale.getISO3Language(), iso3locale); // map display language names for given locales - for (Locale language : new HashSet(asList(supportedDisplayLocale))) { + for (Locale language : displayLanguages) { // make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD); languageMap.put(languageName.toLowerCase(), iso3locale); diff --git a/source/net/filebot/media/ReleaseInfo.properties b/source/net/filebot/media/ReleaseInfo.properties index b41b508a..bb8658e8 100644 --- a/source/net/filebot/media/ReleaseInfo.properties +++ b/source/net/filebot/media/ReleaseInfo.properties @@ -8,7 +8,7 @@ pattern.video.tags: Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|(Director. pattern.video.s3d: ((H|HALF|F|FULL)[^\\p{Alnum}]{0,2})?(SBS|TAB|OU) # patterns for all subtitle tags -pattern.subtitle.tags: forced|HI|SDH|Director.?s.Commentary +pattern.subtitle.tags: forced|HI|SDH # additional release info patterns pattern.video.format: DivX|Xvid|AVC|(x|h)[.]?(264|265)|HEVC|3ivx|PGS|MP[E]?G[45]?|MP[34]|(FLAC|AAC|AC3|DD)(.?[2457][.]?[01])?|[26]ch|(Multi.)?DTS(.HD)?(.MA)?|TrueHD|Atmos|[M0]?(720|1080)[pi]|(?<=[-])(720|1080|2D|3D)|10.?bit|(24|30|60)FPS|Hi10[P]?|[a-z]{2,3}.(2[.]0|5[.]1)|(19|20)[0-9]+(.)S[0-9]+(?!(.)?E[0-9]+)|(?<=\\d+)v[0-4]