filebot/source/net/filebot/media/ReleaseInfo.java

711 lines
24 KiB
Java
Raw Normal View History

2014-04-19 02:30:29 -04:00
package net.filebot.media;
2013-09-11 13:22:00 -04:00
import static java.lang.Integer.*;
import static java.nio.charset.StandardCharsets.*;
2013-09-11 13:22:00 -04:00
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static java.util.ResourceBundle.*;
import static java.util.regex.Pattern.*;
2014-04-19 02:30:29 -04:00
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.text.Collator;
import java.text.Normalizer;
import java.text.Normalizer.Form;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.filebot.Cache;
import net.filebot.CacheType;
import net.filebot.Resource;
2014-04-19 02:30:29 -04:00
import net.filebot.util.ByteBufferInputStream;
import net.filebot.util.FileUtilities.RegexFileFilter;
import net.filebot.web.AnidbSearchResult;
import net.filebot.web.CachedResource;
import net.filebot.web.Movie;
2015-05-11 07:42:59 -04:00
import net.filebot.web.SubtitleSearchResult;
2014-04-19 02:30:29 -04:00
import net.filebot.web.TheTVDBSearchResult;
import org.tukaani.xz.XZInputStream;
public class ReleaseInfo {
private String[] videoSources;
private Pattern videoSourcePattern;
2014-06-29 07:04:04 -04:00
public String getVideoSource(String... input) {
if (videoSources == null || videoSourcePattern == null) {
videoSources = PIPE.split(getProperty("pattern.video.source"));
videoSourcePattern = getVideoSourcePattern();
}
// check parent and itself for group names
return matchLast(videoSourcePattern, videoSources, input);
2014-06-29 07:04:04 -04:00
}
private Pattern videoTagPattern;
2014-06-29 07:04:04 -04:00
public List<String> getVideoTags(String... input) {
if (videoTagPattern == null) {
videoTagPattern = getVideoTagPattern();
}
2014-06-29 07:04:04 -04:00
List<String> tags = new ArrayList<String>();
for (String s : input) {
if (s == null)
continue;
Matcher m = videoTagPattern.matcher(s);
2014-06-29 07:04:04 -04:00
while (m.find()) {
tags.add(m.group());
}
}
return tags;
}
public String getStereoscopic3D(String... input) {
Pattern pattern = getStereoscopic3DPattern();
for (String s : input) {
Matcher m = pattern.matcher(s);
if (m.find()) {
return m.group();
}
}
return null;
}
public String getReleaseGroup(String... strings) throws Exception {
// check file and folder for release group names
String[] groups = releaseGroup.get();
// try case-sensitive match
String match = matchLast(getReleaseGroupPattern(true), groups, strings);
// try case-insensitive match as fallback
if (match == null) {
match = matchLast(getReleaseGroupPattern(false), groups, strings);
}
return match;
}
private Map<String, Locale> languages;
private Pattern languageSuffix;
public Locale getLanguageSuffix(String name) {
// match locale identifier and lookup Locale object
if (languages == null || languageSuffix == null) {
languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault());
languageSuffix = getLanguageSuffixPattern(languages.keySet(), false);
}
String lang = matchLast(languageSuffix, null, name);
if (lang == null)
return null;
return languages.get(lang);
}
protected String matchLast(Pattern pattern, String[] standardValues, CharSequence... sequence) {
String lastMatch = null;
// match last occurrence
for (CharSequence name : sequence) {
if (name == null)
continue;
Matcher matcher = pattern.matcher(name);
while (matcher.find()) {
lastMatch = matcher.group();
}
}
// prefer standard value over matched value
if (lastMatch != null && standardValues != null) {
for (String standard : standardValues) {
if (standard.equalsIgnoreCase(lastMatch)) {
return standard;
}
}
}
return lastMatch;
}
// cached patterns
private final Map<Boolean, Pattern[]> stopwords = new HashMap<Boolean, Pattern[]>(2);
private final Map<Boolean, Pattern[]> blacklist = new HashMap<Boolean, Pattern[]>(2);
public List<String> cleanRelease(Collection<String> items, boolean strict) throws Exception {
Pattern[] stopwords;
Pattern[] blacklist;
// initialize cached patterns
synchronized (this.stopwords) {
stopwords = this.stopwords.get(strict);
blacklist = this.blacklist.get(strict);
if (stopwords == null || blacklist == null) {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
Pattern clutterBracket = getClutterBracketPattern(strict);
Pattern releaseGroup = getReleaseGroupPattern(strict);
Pattern languageSuffix = getLanguageSuffixPattern(languages, strict);
Pattern languageTag = getLanguageTagPattern(languages);
Pattern videoSource = getVideoSourcePattern();
2014-07-14 07:19:41 -04:00
Pattern videoTags = getVideoTagPattern();
Pattern videoFormat = getVideoFormatPattern(strict);
Pattern stereoscopic3d = getStereoscopic3DPattern();
Pattern resolution = getResolutionPattern();
Pattern queryBlacklist = getBlacklistPattern();
2016-01-29 11:06:28 -05:00
stopwords = new Pattern[] { languageTag, videoSource, videoTags, videoFormat, resolution, stereoscopic3d, languageSuffix };
blacklist = new Pattern[] { queryBlacklist, languageTag, clutterBracket, releaseGroup, videoSource, videoTags, videoFormat, resolution, stereoscopic3d, languageSuffix };
// cache compiled patterns for common usage
this.stopwords.put(strict, stopwords);
this.blacklist.put(strict, blacklist);
}
}
List<String> output = new ArrayList<String>(items.size());
for (String it : items) {
2012-07-13 07:15:14 -04:00
it = strict ? clean(it, stopwords) : substringBefore(it, stopwords);
it = normalizePunctuation(clean(it, blacklist));
// ignore empty values
if (it.length() > 0) {
output.add(it);
}
}
return output;
}
public String clean(String item, Pattern... blacklisted) {
for (Pattern it : blacklisted) {
item = it.matcher(item).replaceAll("");
}
return item;
}
public String substringBefore(String item, Pattern... stopwords) {
for (Pattern it : stopwords) {
Matcher matcher = it.matcher(item);
if (matcher.find()) {
String substring = item.substring(0, matcher.start()); // use substring before the matched stopword
if (normalizePunctuation(substring).length() >= 3) {
item = substring; // make sure that the substring has enough data
}
}
}
return item;
}
// cached patterns
private Set<File> volumeRoots;
private Pattern structureRootFolderPattern;
public Set<File> getVolumeRoots() {
if (volumeRoots == null) {
Set<File> volumes = new HashSet<File>();
// user root folder
volumes.add(new File(System.getProperty("user.home")));
// Windows / Linux / Mac system roots
volumes.addAll(getFileSystemRoots());
if (File.separator.equals("/")) {
// Linux and Mac system root folders
for (File root : getFileSystemRoots()) {
volumes.addAll(getChildren(root, FOLDERS));
}
// user-specific media roots
2014-08-10 10:47:47 -04:00
for (File mediaRoot : getMediaRoots()) {
volumes.addAll(getChildren(mediaRoot, FOLDERS));
volumes.add(mediaRoot);
// add additional user roots if user.home is not set properly or listFiles doesn't work
String username = System.getProperty("user.name");
if (username != null && username.length() > 0) {
volumes.add(new File(mediaRoot, username));
}
}
}
2013-12-13 23:11:44 -05:00
volumeRoots = unmodifiableSet(volumes);
}
return volumeRoots;
}
public Pattern getStructureRootPattern() throws Exception {
if (structureRootFolderPattern == null) {
List<String> folders = new ArrayList<String>();
for (String it : queryBlacklist.get()) {
2014-01-11 04:04:49 -05:00
if (it.startsWith("^") && it.endsWith("$")) {
folders.add(it);
}
}
structureRootFolderPattern = compile(or(folders.toArray()), CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
return structureRootFolderPattern;
}
public Pattern getLanguageTagPattern(Collection<String> languages) {
// [en]
return compile("(?<=[-\\[{(])" + or(quoteAll(languages)) + "(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getLanguageSuffixPattern(Collection<String> languages, boolean strict) {
// e.g. ".en.srt" or ".en.forced.srt"
return compile("(?<=[._-])" + or(quoteAll(languages)) + "(?=([._-](" + getProperty("pattern.subtitle.tags") + "))?$)", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getResolutionPattern() {
// match screen resolutions 640x480, 1280x720, etc
return compile("(?<!\\p{Alnum})(\\d{4}|[6-9]\\d{2})x(\\d{4}|[4-9]\\d{2})(?!\\p{Alnum})");
}
public Pattern getVideoFormatPattern(boolean strict) {
// pattern matching any video source name
String pattern = getProperty("pattern.video.format");
return strict ? compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE) : compile(pattern, CASE_INSENSITIVE);
}
public Pattern getVideoSourcePattern() {
2014-06-29 07:04:04 -04:00
// pattern matching any video source name, like BluRay
String pattern = getProperty("pattern.video.source");
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
2014-06-29 07:04:04 -04:00
}
public Pattern getVideoTagPattern() {
// pattern matching any video tag, like Directors Cut
String pattern = getProperty("pattern.video.tags");
2014-06-29 07:04:04 -04:00
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
public Pattern getStereoscopic3DPattern() {
// pattern matching any 3D flags like 3D.HSBS
String pattern = getProperty("pattern.video.s3d");
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
public Pattern getClutterBracketPattern(boolean strict) {
// match patterns like [Action, Drama] or {ENG-XViD-MP3-DVDRiP} etc
String contentFilter = strict ? "[\\p{Space}\\p{Punct}&&[^\\[\\]]]" : "\\p{Alpha}";
return compile("(?:\\[([^\\[\\]]+?" + contentFilter + "[^\\[\\]]+?)\\])|(?:\\{([^\\{\\}]+?" + contentFilter + "[^\\{\\}]+?)\\})|(?:\\(([^\\(\\)]+?" + contentFilter + "[^\\(\\)]+?)\\))");
}
public Pattern getReleaseGroupPattern(boolean strict) throws Exception {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})" + or(releaseGroup.get()) + "(?!\\p{Alnum}|[^\\p{Alnum}](19|20)\\d{2})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getBlacklistPattern() throws Exception {
2011-12-30 16:42:25 -05:00
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})" + or(queryBlacklist.get()) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getExcludePattern() throws Exception {
// pattern matching any release group name enclosed in separators
return compile(or(excludeBlacklist.get()), CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
2014-02-19 15:28:00 -05:00
public Pattern getCustomRemovePattern(Collection<String> terms) throws IOException {
return compile("(?<!\\p{Alnum})" + or(quoteAll(terms)) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
2014-02-19 15:28:00 -05:00
}
public Movie[] getMovieList() throws Exception {
return movieIndex.get();
2011-12-30 16:42:25 -05:00
}
public TheTVDBSearchResult[] getTheTVDBIndex() throws Exception {
return tvdbIndex.get();
}
public AnidbSearchResult[] getAnidbIndex() throws Exception {
return anidbIndex.get();
}
public SubtitleSearchResult[] getOpenSubtitlesIndex() throws Exception {
return osdbIndex.get();
}
2013-12-13 23:11:44 -05:00
private Map<Pattern, String> seriesDirectMappings;
public Map<Pattern, String> getSeriesDirectMappings() throws Exception {
2013-12-13 23:11:44 -05:00
if (seriesDirectMappings == null) {
Map<Pattern, String> mappings = new LinkedHashMap<Pattern, String>();
for (String line : seriesDirectMappingsResource.get()) {
String[] tsv = line.split("\t", 2);
if (tsv.length == 2) {
mappings.put(compile("(?<!\\p{Alnum})(" + tsv[0] + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS), tsv[1]);
2013-12-13 23:11:44 -05:00
}
}
2013-12-13 23:11:44 -05:00
seriesDirectMappings = unmodifiableMap(mappings);
}
2013-12-13 23:11:44 -05:00
return seriesDirectMappings;
}
private static FolderEntryFilter diskFolderFilter;
public FileFilter getDiskFolderFilter() {
if (diskFolderFilter == null) {
diskFolderFilter = new FolderEntryFilter(compile(getProperty("pattern.diskfolder.entry")));
}
return diskFolderFilter;
}
private static RegexFileFilter diskFolderEntryFilter;
public FileFilter getDiskFolderEntryFilter() {
if (diskFolderEntryFilter == null) {
diskFolderEntryFilter = new RegexFileFilter(compile(getProperty("pattern.diskfolder.entry")));
}
return diskFolderEntryFilter;
}
private static ClutterFileFilter clutterFileFilter;
public FileFilter getClutterFileFilter() throws Exception {
if (clutterFileFilter == null) {
clutterFileFilter = new ClutterFileFilter(getExcludePattern(), Long.parseLong(getProperty("number.clutter.maxfilesize"))); // only files smaller than 250 MB may be considered clutter
}
return clutterFileFilter;
}
public List<File> getMediaRoots() {
List<File> roots = new ArrayList<File>();
for (String it : getProperty("folder.media.roots").split(":")) {
roots.add(new File(it));
}
return roots;
}
2011-11-26 10:41:58 -05:00
// fetch release group names online and try to update the data every other day
protected final Resource<String[]> releaseGroup = patternResource("url.release-groups");
protected final Resource<String[]> queryBlacklist = patternResource("url.query-blacklist");
protected final Resource<String[]> excludeBlacklist = patternResource("url.exclude-blacklist");
protected final Resource<String[]> seriesDirectMappingsResource = patternResource("url.series-mappings");
protected final Resource<Movie[]> movieIndex = tsvResource("url.movie-list", this::parseMovie, Movie[]::new);
protected final Resource<TheTVDBSearchResult[]> tvdbIndex = tsvResource("url.thetvdb-index", this::parseSeries, TheTVDBSearchResult[]::new);
protected final Resource<AnidbSearchResult[]> anidbIndex = tsvResource("url.anidb-index", this::parseAnime, AnidbSearchResult[]::new);
protected final Resource<SubtitleSearchResult[]> osdbIndex = tsvResource("url.osdb-index", this::parseSubtitle, SubtitleSearchResult[]::new);
private Movie parseMovie(String[] v) {
int imdbid = parseInt(v[0]);
int tmdbid = parseInt(v[1]);
int year = parseInt(v[2]);
String name = v[3];
String[] aliasNames = copyOfRange(v, 4, v.length);
return new Movie(name, aliasNames, year, imdbid > 0 ? imdbid : -1, tmdbid > 0 ? tmdbid : -1, null);
}
private TheTVDBSearchResult parseSeries(String[] v) {
int id = parseInt(v[0]);
String name = v[1];
String[] aliasNames = copyOfRange(v, 2, v.length);
return new TheTVDBSearchResult(name, aliasNames, id);
}
private AnidbSearchResult parseAnime(String[] v) {
int aid = parseInt(v[0]);
String primaryTitle = v[1];
String[] aliasNames = copyOfRange(v, 2, v.length);
return new AnidbSearchResult(aid, primaryTitle, aliasNames);
}
private SubtitleSearchResult parseSubtitle(String[] v) {
String kind = v[0];
int score = parseInt(v[1]);
int imdbId = parseInt(v[2]);
int year = parseInt(v[3]);
String name = v[4];
String[] aliasNames = copyOfRange(v, 5, v.length);
return new SubtitleSearchResult(name, aliasNames, year, imdbId, -1, Locale.ENGLISH, SubtitleSearchResult.Kind.forName(kind), score);
}
protected Resource<String[]> patternResource(String name) {
return resource(name, Cache.ONE_WEEK, s -> {
return s.length() > 0 ? s : null;
}, String[]::new);
}
protected <T> Resource<T[]> tsvResource(String name, Function<String[], T> parse, IntFunction<T[]> generator) {
return resource(name, Cache.ONE_WEEK, s -> {
String[] v = s.split("\t");
return v.length > 0 ? parse.apply(v) : null;
}, generator);
}
protected <T> Resource<T[]> resource(String name, Duration expirationTime, Function<String, T> parse, IntFunction<T[]> generator) {
return () -> {
Cache cache = Cache.getCache("data", CacheType.Persistent);
byte[] bytes = cache.bytes(name, n -> new URL(getProperty(n))).expire(expirationTime).get();
// all data file are xz compressed
try (BufferedReader text = new BufferedReader(new InputStreamReader(new XZInputStream(new ByteArrayInputStream(bytes)), UTF_8))) {
return text.lines().map(parse).filter(Objects::nonNull).toArray(generator);
}
};
}
protected String getProperty(String name) {
// override resource locations via Java System properties
return System.getProperty(name, getBundle(ReleaseInfo.class.getName()).getString(name));
}
2011-12-30 16:42:25 -05:00
protected static class PatternResource extends CachedResource<String[]> {
2011-12-30 16:42:25 -05:00
public PatternResource(String resource) {
2013-12-15 11:01:26 -05:00
super(resource, String[].class, ONE_WEEK); // check for updates every week
2011-12-30 16:42:25 -05:00
}
@Override
public String[] process(ByteBuffer data) {
2011-12-30 16:42:25 -05:00
return compile("\\n").split(Charset.forName("UTF-8").decode(data));
}
2011-12-30 16:42:25 -05:00
}
protected static class MovieResource extends CachedResource<Movie[]> {
public MovieResource(String resource) {
super(resource, Movie[].class, ONE_MONTH); // check for updates every month
}
@Override
public Movie[] process(ByteBuffer data) throws IOException {
2013-09-07 11:48:24 -04:00
List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<Movie> movies = new ArrayList<Movie>(rows.size());
for (String[] row : rows) {
int imdbid = parseInt(row[0]);
int tmdbid = parseInt(row[1]);
int year = parseInt(row[2]);
String name = row[3];
String[] aliasNames = copyOfRange(row, 4, row.length);
2014-07-24 07:31:24 -04:00
movies.add(new Movie(name, aliasNames, year, imdbid > 0 ? imdbid : -1, tmdbid > 0 ? tmdbid : -1, null));
}
return movies.toArray(new Movie[0]);
}
}
protected static class TheTVDBIndexResource extends CachedResource<TheTVDBSearchResult[]> {
public TheTVDBIndexResource(String resource) {
2013-12-15 11:01:26 -05:00
super(resource, TheTVDBSearchResult[].class, ONE_WEEK); // check for updates every week
}
@Override
public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException {
2013-09-07 11:48:24 -04:00
List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<TheTVDBSearchResult> tvshows = new ArrayList<TheTVDBSearchResult>(rows.size());
for (String[] row : rows) {
int id = parseInt(row[0]);
String name = row[1];
String[] aliasNames = copyOfRange(row, 2, row.length);
tvshows.add(new TheTVDBSearchResult(name, aliasNames, id));
}
return tvshows.toArray(new TheTVDBSearchResult[0]);
2012-02-11 09:03:54 -05:00
}
}
protected static class AnidbIndexResource extends CachedResource<AnidbSearchResult[]> {
public AnidbIndexResource(String resource) {
super(resource, AnidbSearchResult[].class, ONE_WEEK); // check for updates every week
}
@Override
public AnidbSearchResult[] process(ByteBuffer data) throws IOException {
2013-09-07 11:48:24 -04:00
List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<AnidbSearchResult> anime = new ArrayList<AnidbSearchResult>(rows.size());
for (String[] row : rows) {
int aid = parseInt(row[0]);
String primaryTitle = row[1];
String[] aliasNames = copyOfRange(row, 2, row.length);
anime.add(new AnidbSearchResult(aid, primaryTitle, aliasNames));
}
return anime.toArray(new AnidbSearchResult[0]);
}
}
2015-05-11 07:42:59 -04:00
protected static class OpenSubtitlesIndexResource extends CachedResource<SubtitleSearchResult[]> {
public OpenSubtitlesIndexResource(String resource) {
2015-05-11 07:42:59 -04:00
super(resource, SubtitleSearchResult[].class, ONE_MONTH); // check for updates every month
}
@Override
2015-05-11 07:42:59 -04:00
public SubtitleSearchResult[] process(ByteBuffer data) throws IOException {
List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
2015-05-11 07:42:59 -04:00
List<SubtitleSearchResult> result = new ArrayList<SubtitleSearchResult>(rows.size());
for (String[] row : rows) {
String kind = row[0];
int score = parseInt(row[1]);
int imdbId = parseInt(row[2]);
int year = parseInt(row[3]);
String name = row[4];
String[] aliasNames = copyOfRange(row, 5, row.length);
result.add(new SubtitleSearchResult(name, aliasNames, year, imdbId, -1, Locale.ENGLISH, SubtitleSearchResult.Kind.forName(kind), score));
}
2015-05-11 07:42:59 -04:00
return result.toArray(new SubtitleSearchResult[0]);
}
}
protected static class FolderEntryFilter implements FileFilter {
private final Pattern entryPattern;
public FolderEntryFilter(Pattern entryPattern) {
this.entryPattern = entryPattern;
}
@Override
public boolean accept(File dir) {
if (dir.isDirectory()) {
2014-10-21 06:26:42 -04:00
for (File f : getChildren(dir)) {
if (entryPattern.matcher(f.getName()).matches()) {
return true;
}
}
}
return false;
}
}
public static class FileFolderNameFilter implements FileFilter {
private final Pattern namePattern;
public FileFolderNameFilter(Pattern namePattern) {
this.namePattern = namePattern;
}
@Override
public boolean accept(File file) {
return (namePattern.matcher(file.getName()).find() || (file.isFile() && namePattern.matcher(file.getParentFile().getName()).find()));
}
}
2013-03-28 05:04:35 -04:00
public static class ClutterFileFilter extends FileFolderNameFilter {
2013-03-28 05:04:35 -04:00
private long maxFileSize;
2013-03-28 05:04:35 -04:00
public ClutterFileFilter(Pattern namePattern, long maxFileSize) {
super(namePattern);
this.maxFileSize = maxFileSize;
}
2013-03-28 05:04:35 -04:00
@Override
public boolean accept(File file) {
return super.accept(file) && file.isFile() && file.length() < maxFileSize;
}
}
private String or(Object[] terms) {
return joinSorted(terms, "|", reverseOrder(), "(", ")"); // non-capturing group that matches the longest occurrence
}
private String[] quoteAll(Collection<String> values) {
return values.stream().map((s) -> Pattern.quote(s)).toArray(String[]::new);
}
2014-12-03 03:45:33 -05:00
public Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
// use maximum strength collator by default
Collator collator = Collator.getInstance(Locale.ENGLISH);
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
collator.setStrength(Collator.PRIMARY);
Comparator<? super String> order = collator;
Map<String, Locale> languageMap = new TreeMap<String, Locale>(order);
for (String code : Locale.getISOLanguages()) {
Locale locale = new Locale(code); // force ISO3 language as default toString() value
Locale iso3locale = new Locale(locale.getISO3Language());
languageMap.put(locale.getLanguage(), iso3locale);
languageMap.put(locale.getISO3Language(), iso3locale);
// map display language names for given locales
for (Locale language : new HashSet<Locale>(asList(supportedDisplayLocale))) {
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
languageMap.put(languageName.toLowerCase(), iso3locale);
}
}
// unofficial language for pb/pob for Portuguese (Brazil)
Locale brazil = new Locale("pob");
2014-12-03 03:45:33 -05:00
languageMap.put("brazilian", brazil);
languageMap.put("pb", brazil);
languageMap.put("pob", brazil);
// missing ISO 639-2 (B/T) locales (see https://github.com/TakahikoKawasaki/nv-i18n/blob/master/src/main/java/com/neovisionaries/i18n/LanguageAlpha3Code.java)
languageMap.put("tib", new Locale("bod"));
languageMap.put("cze", new Locale("ces"));
languageMap.put("wel", new Locale("cym"));
languageMap.put("ger", new Locale("deu"));
languageMap.put("gre", new Locale("ell"));
languageMap.put("baq", new Locale("eus"));
languageMap.put("per", new Locale("fas"));
languageMap.put("fre", new Locale("fra"));
languageMap.put("arm", new Locale("hye"));
languageMap.put("ice", new Locale("isl"));
languageMap.put("geo", new Locale("kat"));
languageMap.put("mac", new Locale("mkd"));
languageMap.put("mao", new Locale("mri"));
languageMap.put("may", new Locale("msa"));
languageMap.put("bur", new Locale("mya"));
languageMap.put("dut", new Locale("nld"));
languageMap.put("rum", new Locale("ron"));
languageMap.put("slo", new Locale("slk"));
languageMap.put("alb", new Locale("sqi"));
languageMap.put("chi", new Locale("zho"));
2014-05-09 16:27:18 -04:00
// remove illegal tokens
languageMap.remove("");
languageMap.remove("II");
languageMap.remove("III");
2016-02-25 07:16:18 -05:00
languageMap.remove("hi"); // hi => typically used for hearing-impaired subtitles, NOT hindi language
2016-02-25 07:16:18 -05:00
return unmodifiableMap(languageMap);
}
}