2011-11-14 06:43:22 -05:00
|
|
|
|
2011-12-26 13:10:53 -05:00
|
|
|
package net.sourceforge.filebot.media;
|
2011-11-14 06:43:22 -05:00
|
|
|
|
|
|
|
|
2012-02-15 01:12:09 -05:00
|
|
|
import static java.util.Arrays.*;
|
2011-11-14 06:43:22 -05:00
|
|
|
import static java.util.ResourceBundle.*;
|
|
|
|
import static java.util.regex.Pattern.*;
|
2012-01-01 22:48:24 -05:00
|
|
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
2011-11-14 06:43:22 -05:00
|
|
|
import static net.sourceforge.tuned.StringUtilities.*;
|
|
|
|
|
|
|
|
import java.io.File;
|
2012-02-10 11:43:09 -05:00
|
|
|
import java.io.FileFilter;
|
2011-11-14 06:43:22 -05:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.nio.ByteBuffer;
|
|
|
|
import java.nio.charset.Charset;
|
2012-02-15 01:12:09 -05:00
|
|
|
import java.text.Collator;
|
|
|
|
import java.text.Normalizer;
|
|
|
|
import java.text.Normalizer.Form;
|
2011-11-14 06:43:22 -05:00
|
|
|
import java.util.ArrayList;
|
2012-02-15 01:12:09 -05:00
|
|
|
import java.util.Collection;
|
|
|
|
import java.util.Comparator;
|
|
|
|
import java.util.HashSet;
|
2011-11-14 06:43:22 -05:00
|
|
|
import java.util.List;
|
2011-12-30 10:34:02 -05:00
|
|
|
import java.util.Locale;
|
2012-01-02 11:59:37 -05:00
|
|
|
import java.util.Map;
|
2012-01-01 22:48:24 -05:00
|
|
|
import java.util.Scanner;
|
2012-02-15 01:12:09 -05:00
|
|
|
import java.util.Set;
|
2012-01-02 11:59:37 -05:00
|
|
|
import java.util.TreeMap;
|
2011-11-14 06:43:22 -05:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
2012-01-01 22:48:24 -05:00
|
|
|
import java.util.zip.GZIPInputStream;
|
2011-11-14 06:43:22 -05:00
|
|
|
|
|
|
|
import net.sourceforge.filebot.web.CachedResource;
|
2012-01-01 22:48:24 -05:00
|
|
|
import net.sourceforge.filebot.web.Movie;
|
2012-02-11 09:03:54 -05:00
|
|
|
import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
2012-01-01 22:48:24 -05:00
|
|
|
import net.sourceforge.tuned.ByteBufferInputStream;
|
2011-11-14 06:43:22 -05:00
|
|
|
|
|
|
|
|
|
|
|
public class ReleaseInfo {
|
|
|
|
|
|
|
|
public String getVideoSource(File file) {
|
|
|
|
// check parent and itself for group names
|
2012-01-01 22:48:24 -05:00
|
|
|
return matchLast(getVideoSourcePattern(), getBundle(getClass().getName()).getString("pattern.video.source").split("[|]"), file.getParent(), file.getName());
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2011-11-14 06:43:22 -05:00
|
|
|
public String getReleaseGroup(File file) throws IOException {
|
|
|
|
// check parent and itself for group names
|
2012-01-01 22:48:24 -05:00
|
|
|
return matchLast(getReleaseGroupPattern(false), releaseGroupResource.get(), file.getParent(), file.getName());
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2012-01-02 11:59:37 -05:00
|
|
|
public Locale getLanguageSuffix(String name) {
|
|
|
|
// match locale identifier and lookup Locale object
|
2012-02-15 01:12:09 -05:00
|
|
|
Map<String, Locale> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault());
|
|
|
|
String lang = matchLast(getLanguageSuffixPattern(languages.keySet()), null, name);
|
2012-01-02 11:59:37 -05:00
|
|
|
if (lang == null)
|
|
|
|
return null;
|
|
|
|
|
2012-02-15 01:12:09 -05:00
|
|
|
return languages.get(lang);
|
2012-01-02 11:59:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
protected String matchLast(Pattern pattern, String[] standardValues, CharSequence... sequence) {
|
2011-11-14 06:43:22 -05:00
|
|
|
String lastMatch = null;
|
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
// match last occurrence
|
2011-11-14 06:43:22 -05:00
|
|
|
for (CharSequence name : sequence) {
|
|
|
|
if (name == null)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Matcher matcher = pattern.matcher(name);
|
|
|
|
while (matcher.find()) {
|
|
|
|
lastMatch = matcher.group();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
// prefer standard value over matched value
|
2012-01-02 11:59:37 -05:00
|
|
|
if (lastMatch != null && standardValues != null) {
|
2012-01-01 22:48:24 -05:00
|
|
|
for (String standard : standardValues) {
|
|
|
|
if (standard.equalsIgnoreCase(lastMatch)) {
|
|
|
|
return standard;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-14 06:43:22 -05:00
|
|
|
return lastMatch;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
public List<String> cleanRelease(Iterable<String> items, boolean strict) throws IOException {
|
2012-02-15 01:12:09 -05:00
|
|
|
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
|
|
|
return clean(items, getReleaseGroupPattern(strict), getLanguageSuffixPattern(languages), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern(), getLanguageOptionPattern(languages));
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
public String cleanRelease(String item, boolean strict) throws IOException {
|
2012-02-15 01:12:09 -05:00
|
|
|
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
|
|
|
return clean(item, getReleaseGroupPattern(strict), getLanguageSuffixPattern(languages), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern(), getLanguageOptionPattern(languages));
|
2011-11-26 04:50:31 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2011-11-14 06:43:22 -05:00
|
|
|
public List<String> clean(Iterable<String> items, Pattern... blacklisted) {
|
2011-11-26 04:50:31 -05:00
|
|
|
List<String> cleanedItems = new ArrayList<String>();
|
|
|
|
for (String it : items) {
|
2012-01-01 22:48:24 -05:00
|
|
|
String cleanedItem = clean(it, blacklisted);
|
|
|
|
if (cleanedItem.length() > 0) {
|
|
|
|
cleanedItems.add(cleanedItem);
|
|
|
|
}
|
2011-11-26 04:50:31 -05:00
|
|
|
}
|
2011-11-14 06:43:22 -05:00
|
|
|
|
2011-11-26 04:50:31 -05:00
|
|
|
return cleanedItems;
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2011-11-26 04:50:31 -05:00
|
|
|
public String clean(String item, Pattern... blacklisted) {
|
|
|
|
for (Pattern it : blacklisted) {
|
|
|
|
item = it.matcher(item).replaceAll("");
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
return normalizePunctuation(item);
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2012-02-15 01:12:09 -05:00
|
|
|
public Pattern getLanguageOptionPattern(Collection<String> languages) {
|
|
|
|
// [en]
|
|
|
|
return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
2012-01-02 11:59:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-02-15 01:12:09 -05:00
|
|
|
public Pattern getLanguageSuffixPattern(Collection<String> languages) {
|
|
|
|
// .en.srt
|
|
|
|
return compile("(?<=\\p{Punct}|\\s)(" + join(quoteAll(languages), "|") + ")(?=$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
2011-12-30 10:34:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public Pattern getResolutionPattern() {
|
|
|
|
// match screen resolutions 640x480, 1280x720, etc
|
|
|
|
return compile("(?<!\\p{Alnum})(\\d{4}|[6-9]\\d{2})x(\\d{4}|[4-9]\\d{2})(?!\\p{Alnum})");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-12-30 16:42:25 -05:00
|
|
|
public Pattern getVideoFormatPattern() {
|
2011-11-14 06:43:22 -05:00
|
|
|
// pattern matching any video source name
|
2011-12-30 16:42:25 -05:00
|
|
|
String pattern = getBundle(getClass().getName()).getString("pattern.video.format");
|
2011-11-14 06:43:22 -05:00
|
|
|
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2011-11-14 06:43:22 -05:00
|
|
|
public Pattern getVideoSourcePattern() {
|
|
|
|
// pattern matching any video source name
|
|
|
|
String pattern = getBundle(getClass().getName()).getString("pattern.video.source");
|
|
|
|
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
|
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
public synchronized Pattern getReleaseGroupPattern(boolean strict) throws IOException {
|
2011-11-14 06:43:22 -05:00
|
|
|
// pattern matching any release group name enclosed in separators
|
2012-01-01 22:48:24 -05:00
|
|
|
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
|
|
|
|
2011-12-03 05:50:45 -05:00
|
|
|
|
2012-02-15 01:12:09 -05:00
|
|
|
public synchronized Pattern getBlacklistPattern() throws IOException {
|
2011-12-30 16:42:25 -05:00
|
|
|
// pattern matching any release group name enclosed in separators
|
2012-02-15 01:12:09 -05:00
|
|
|
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
|
2012-01-01 22:48:24 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public synchronized Movie[] getMovieList() throws IOException {
|
|
|
|
return movieListResource.get();
|
2011-12-30 16:42:25 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-02-11 09:03:54 -05:00
|
|
|
public synchronized TheTVDBSearchResult[] getSeriesList() throws IOException {
|
|
|
|
return seriesListResource.get();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-02-10 11:43:09 -05:00
|
|
|
public FileFilter getDiskFolderFilter() {
|
|
|
|
return new FolderEntryFilter(compile(getBundle(getClass().getName()).getString("pattern.diskfolder.entry")));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-26 10:41:58 -05:00
|
|
|
// fetch release group names online and try to update the data every other day
|
2012-01-01 22:48:24 -05:00
|
|
|
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
|
|
|
|
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
|
|
|
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
2012-02-11 09:03:54 -05:00
|
|
|
protected final CachedResource<TheTVDBSearchResult[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));
|
2011-12-30 16:42:25 -05:00
|
|
|
|
|
|
|
|
|
|
|
protected static class PatternResource extends CachedResource<String[]> {
|
|
|
|
|
|
|
|
public PatternResource(String resource) {
|
|
|
|
super(resource, String[].class, 24 * 60 * 60 * 1000); // 24h update interval
|
|
|
|
}
|
|
|
|
|
2011-11-14 06:43:22 -05:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public String[] process(ByteBuffer data) {
|
2011-12-30 16:42:25 -05:00
|
|
|
return compile("\\n").split(Charset.forName("UTF-8").decode(data));
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|
2011-12-30 16:42:25 -05:00
|
|
|
}
|
2011-11-14 06:43:22 -05:00
|
|
|
|
2012-01-01 22:48:24 -05:00
|
|
|
|
|
|
|
protected static class MovieResource extends CachedResource<Movie[]> {
|
|
|
|
|
|
|
|
public MovieResource(String resource) {
|
|
|
|
super(resource, Movie[].class, 24 * 60 * 60 * 1000); // 24h update interval
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Movie[] process(ByteBuffer data) throws IOException {
|
|
|
|
Scanner scanner = new Scanner(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n");
|
|
|
|
|
|
|
|
List<Movie> movies = new ArrayList<Movie>();
|
|
|
|
while (scanner.hasNext()) {
|
|
|
|
int imdbid = scanner.nextInt();
|
|
|
|
String name = scanner.next();
|
|
|
|
int year = scanner.nextInt();
|
|
|
|
movies.add(new Movie(name, year, imdbid));
|
|
|
|
}
|
|
|
|
|
|
|
|
return movies.toArray(new Movie[0]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-10 11:43:09 -05:00
|
|
|
|
2012-02-11 09:03:54 -05:00
|
|
|
protected static class SeriesResource extends CachedResource<TheTVDBSearchResult[]> {
|
|
|
|
|
|
|
|
public SeriesResource(String resource) {
|
|
|
|
super(resource, TheTVDBSearchResult[].class, 24 * 60 * 60 * 1000); // 24h update interval
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException {
|
|
|
|
Scanner scanner = new Scanner(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n");
|
|
|
|
|
|
|
|
List<TheTVDBSearchResult> tvshows = new ArrayList<TheTVDBSearchResult>();
|
|
|
|
while (scanner.hasNext()) {
|
|
|
|
int sid = scanner.nextInt();
|
|
|
|
String name = scanner.next();
|
|
|
|
tvshows.add(new TheTVDBSearchResult(name, sid));
|
|
|
|
}
|
|
|
|
|
|
|
|
return tvshows.toArray(new TheTVDBSearchResult[0]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-02-10 11:43:09 -05:00
|
|
|
protected static class FolderEntryFilter implements FileFilter {
|
|
|
|
|
|
|
|
private final Pattern entryPattern;
|
|
|
|
|
|
|
|
|
|
|
|
public FolderEntryFilter(Pattern entryPattern) {
|
|
|
|
this.entryPattern = entryPattern;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean accept(File dir) {
|
|
|
|
if (dir.isDirectory()) {
|
|
|
|
for (String entry : dir.list()) {
|
|
|
|
if (entryPattern.matcher(entry).matches()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-15 01:12:09 -05:00
|
|
|
|
|
|
|
private Collection<String> quoteAll(Collection<String> strings) {
|
|
|
|
List<String> patterns = new ArrayList<String>(strings.size());
|
|
|
|
for (String it : strings) {
|
|
|
|
patterns.add(Pattern.quote(it));
|
|
|
|
}
|
|
|
|
return patterns;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
|
|
|
|
// use maximum strength collator by default
|
|
|
|
Collator collator = Collator.getInstance(Locale.ROOT);
|
|
|
|
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
|
|
|
collator.setStrength(Collator.PRIMARY);
|
|
|
|
|
|
|
|
@SuppressWarnings("unchecked")
|
|
|
|
Comparator<String> order = (Comparator) collator;
|
|
|
|
|
|
|
|
Map<String, Locale> languageMap = new TreeMap<String, Locale>(order);
|
|
|
|
Set<Locale> displayLocales = new HashSet<Locale>(asList(supportedDisplayLocale));
|
|
|
|
|
|
|
|
for (String code : Locale.getISOLanguages()) {
|
|
|
|
Locale locale = new Locale(code);
|
|
|
|
languageMap.put(locale.getLanguage(), locale);
|
|
|
|
languageMap.put(locale.getISO3Language(), locale);
|
|
|
|
|
|
|
|
// map display language names for given locales
|
|
|
|
for (Locale language : displayLocales) {
|
|
|
|
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
|
|
|
|
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
|
|
|
|
languageMap.put(languageName, locale);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove illegal tokens
|
|
|
|
languageMap.remove("");
|
|
|
|
return languageMap;
|
|
|
|
}
|
|
|
|
|
2011-11-14 06:43:22 -05:00
|
|
|
}
|