2011-12-26 13:10:53 -05:00
package net.sourceforge.filebot.media ;
2011-11-14 06:43:22 -05:00
2013-09-11 13:22:00 -04:00
import static java.lang.Integer.* ;
import static java.util.Arrays.* ;
import static java.util.Collections.* ;
import static java.util.ResourceBundle.* ;
import static java.util.regex.Pattern.* ;
import static net.sourceforge.filebot.similarity.Normalization.* ;
import static net.sourceforge.tuned.FileUtilities.* ;
import static net.sourceforge.tuned.StringUtilities.* ;
2011-11-14 06:43:22 -05:00
import java.io.File ;
2012-02-10 11:43:09 -05:00
import java.io.FileFilter ;
2011-11-14 06:43:22 -05:00
import java.io.IOException ;
import java.nio.ByteBuffer ;
import java.nio.charset.Charset ;
2012-02-15 01:12:09 -05:00
import java.text.Collator ;
import java.text.Normalizer ;
import java.text.Normalizer.Form ;
2011-11-14 06:43:22 -05:00
import java.util.ArrayList ;
2012-02-15 01:12:09 -05:00
import java.util.Collection ;
import java.util.Comparator ;
2012-07-26 04:45:15 -04:00
import java.util.HashMap ;
2012-02-15 01:12:09 -05:00
import java.util.HashSet ;
2012-10-09 11:04:14 -04:00
import java.util.LinkedHashMap ;
2011-11-14 06:43:22 -05:00
import java.util.List ;
2011-12-30 10:34:02 -05:00
import java.util.Locale ;
2012-01-02 11:59:37 -05:00
import java.util.Map ;
2012-02-15 01:12:09 -05:00
import java.util.Set ;
2012-01-02 11:59:37 -05:00
import java.util.TreeMap ;
2011-11-14 06:43:22 -05:00
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
2013-07-13 06:01:33 -04:00
import net.sourceforge.filebot.web.AnidbSearchResult ;
2011-11-14 06:43:22 -05:00
import net.sourceforge.filebot.web.CachedResource ;
2012-01-01 22:48:24 -05:00
import net.sourceforge.filebot.web.Movie ;
2013-07-13 06:01:33 -04:00
import net.sourceforge.filebot.web.TheTVDBSearchResult ;
2012-01-01 22:48:24 -05:00
import net.sourceforge.tuned.ByteBufferInputStream ;
2013-09-17 13:32:38 -04:00
import net.sourceforge.tuned.FileUtilities.RegexFileFilter ;
2011-11-14 06:43:22 -05:00
2013-08-10 03:56:11 -04:00
import org.tukaani.xz.XZInputStream ;
2011-11-14 06:43:22 -05:00
public class ReleaseInfo {
2013-08-10 03:56:11 -04:00
2012-10-24 07:57:36 -04:00
public String getVideoSource ( String . . . strings ) {
2011-11-14 06:43:22 -05:00
// check parent and itself for group names
2012-10-24 07:57:36 -04:00
return matchLast ( getVideoSourcePattern ( ) , getBundle ( getClass ( ) . getName ( ) ) . getString ( " pattern.video.source " ) . split ( " [|] " ) , strings ) ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2012-10-24 07:57:36 -04:00
public String getReleaseGroup ( String . . . strings ) throws IOException {
2012-06-27 22:36:32 -04:00
// check file and folder for release group names
String [ ] groups = releaseGroupResource . get ( ) ;
2013-08-10 03:56:11 -04:00
2012-06-27 22:36:32 -04:00
// try case-sensitive match
2012-10-24 07:57:36 -04:00
String match = matchLast ( getReleaseGroupPattern ( true ) , groups , strings ) ;
2013-08-10 03:56:11 -04:00
2012-06-27 22:36:32 -04:00
// try case-insensitive match as fallback
if ( match = = null ) {
2012-10-24 07:57:36 -04:00
match = matchLast ( getReleaseGroupPattern ( false ) , groups , strings ) ;
2012-06-27 22:36:32 -04:00
}
2013-08-10 03:56:11 -04:00
2012-06-27 22:36:32 -04:00
return match ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2012-01-02 11:59:37 -05:00
public Locale getLanguageSuffix ( String name ) {
// match locale identifier and lookup Locale object
2012-02-15 01:12:09 -05:00
Map < String , Locale > languages = getLanguageMap ( Locale . ENGLISH , Locale . getDefault ( ) ) ;
2013-08-10 03:56:11 -04:00
2012-11-22 11:45:40 -05:00
String lang = matchLast ( getLanguageSuffixPattern ( languages . keySet ( ) , false ) , null , name ) ;
2012-01-02 11:59:37 -05:00
if ( lang = = null )
return null ;
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
return languages . get ( lang ) ;
2012-01-02 11:59:37 -05:00
}
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
protected String matchLast ( Pattern pattern , String [ ] standardValues , CharSequence . . . sequence ) {
2011-11-14 06:43:22 -05:00
String lastMatch = null ;
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
// match last occurrence
2011-11-14 06:43:22 -05:00
for ( CharSequence name : sequence ) {
if ( name = = null )
continue ;
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
Matcher matcher = pattern . matcher ( name ) ;
while ( matcher . find ( ) ) {
lastMatch = matcher . group ( ) ;
}
}
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
// prefer standard value over matched value
2012-01-02 11:59:37 -05:00
if ( lastMatch ! = null & & standardValues ! = null ) {
2012-01-01 22:48:24 -05:00
for ( String standard : standardValues ) {
if ( standard . equalsIgnoreCase ( lastMatch ) ) {
return standard ;
}
}
}
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
return lastMatch ;
}
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
// cached patterns
2012-07-26 04:45:15 -04:00
private final Map < Boolean , Pattern [ ] > stopwords = new HashMap < Boolean , Pattern [ ] > ( 2 ) ;
private final Map < Boolean , Pattern [ ] > blacklist = new HashMap < Boolean , Pattern [ ] > ( 2 ) ;
2013-08-10 03:56:11 -04:00
2012-02-23 13:48:35 -05:00
public List < String > cleanRelease ( Collection < String > items , boolean strict ) throws IOException {
2012-07-24 16:01:48 -04:00
Pattern [ ] stopwords ;
Pattern [ ] blacklist ;
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
// initialize cached patterns
2012-07-26 04:45:15 -04:00
synchronized ( this . stopwords ) {
stopwords = this . stopwords . get ( strict ) ;
blacklist = this . blacklist . get ( strict ) ;
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
if ( stopwords = = null | | blacklist = = null ) {
Set < String > languages = getLanguageMap ( Locale . ENGLISH , Locale . getDefault ( ) ) . keySet ( ) ;
Pattern clutterBracket = getClutterBracketPattern ( strict ) ;
Pattern releaseGroup = getReleaseGroupPattern ( strict ) ;
2012-11-22 11:45:40 -05:00
Pattern languageSuffix = getLanguageSuffixPattern ( languages , strict ) ;
2012-07-24 16:01:48 -04:00
Pattern languageTag = getLanguageTagPattern ( languages ) ;
Pattern videoSource = getVideoSourcePattern ( ) ;
2013-11-27 13:49:15 -05:00
Pattern videoFormat = getVideoFormatPattern ( strict ) ;
2012-07-24 16:01:48 -04:00
Pattern resolution = getResolutionPattern ( ) ;
Pattern queryBlacklist = getBlacklistPattern ( ) ;
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
stopwords = new Pattern [ ] { languageTag , videoSource , videoFormat , resolution , languageSuffix } ;
2012-11-15 08:48:28 -05:00
blacklist = new Pattern [ ] { queryBlacklist , languageTag , clutterBracket , releaseGroup , videoSource , videoFormat , resolution , languageSuffix } ;
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
// cache compiled patterns for common usage
2012-07-26 04:45:15 -04:00
this . stopwords . put ( strict , stopwords ) ;
this . blacklist . put ( strict , blacklist ) ;
2012-07-24 16:01:48 -04:00
}
}
2013-08-10 03:56:11 -04:00
2012-02-23 13:48:35 -05:00
List < String > output = new ArrayList < String > ( items . size ( ) ) ;
2011-11-26 04:50:31 -05:00
for ( String it : items ) {
2012-07-13 07:15:14 -04:00
it = strict ? clean ( it , stopwords ) : substringBefore ( it , stopwords ) ;
2013-04-01 06:17:20 -04:00
it = normalizePunctuation ( clean ( it , blacklist ) ) ;
2013-08-10 03:56:11 -04:00
2012-02-23 13:48:35 -05:00
// ignore empty values
if ( it . length ( ) > 0 ) {
output . add ( it ) ;
2012-01-01 22:48:24 -05:00
}
2011-11-26 04:50:31 -05:00
}
2013-08-10 03:56:11 -04:00
2012-02-23 13:48:35 -05:00
return output ;
2011-11-26 04:50:31 -05:00
}
2013-08-10 03:56:11 -04:00
2011-11-26 04:50:31 -05:00
public String clean ( String item , Pattern . . . blacklisted ) {
for ( Pattern it : blacklisted ) {
item = it . matcher ( item ) . replaceAll ( " " ) ;
2011-11-14 06:43:22 -05:00
}
2013-04-01 06:17:20 -04:00
return item ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2012-02-23 13:48:35 -05:00
public String substringBefore ( String item , Pattern . . . stopwords ) {
for ( Pattern it : stopwords ) {
Matcher matcher = it . matcher ( item ) ;
if ( matcher . find ( ) ) {
2012-07-04 06:20:52 -04:00
String substring = item . substring ( 0 , matcher . start ( ) ) ; // use substring before the matched stopword
if ( normalizePunctuation ( substring ) . length ( ) > = 3 ) {
item = substring ; // make sure that the substring has enough data
}
2012-02-23 13:48:35 -05:00
}
}
return item ;
}
2013-08-10 03:56:11 -04:00
2013-11-16 00:37:41 -05:00
// cached patterns
2013-11-16 07:10:23 -05:00
private Set < File > volumeRoots ;
2013-11-16 00:37:41 -05:00
private Pattern structureRootFolderPattern ;
2013-11-16 07:10:23 -05:00
public Set < File > getVolumeRoots ( ) {
if ( volumeRoots = = null ) {
Set < File > volumes = new HashSet < File > ( ) ;
// user root folder
volumes . add ( new File ( System . getProperty ( " user.home " ) ) ) ;
// Windows / Linux / Mac system roots
addAll ( volumes , File . listRoots ( ) ) ;
if ( File . separator . equals ( " / " ) ) {
2014-01-06 13:15:37 -05:00
// Linux and Mac system root folders
2013-11-16 07:10:23 -05:00
for ( File root : File . listRoots ( ) ) {
2014-01-06 13:15:37 -05:00
File [ ] f = root . listFiles ( FOLDERS ) ;
if ( f ! = null ) {
addAll ( volumes , f ) ;
}
2013-11-16 07:10:23 -05:00
}
2014-01-06 13:15:37 -05:00
// user-specific media roots
for ( File root : getMediaRoots ( ) ) {
2013-11-16 07:10:23 -05:00
if ( root . isDirectory ( ) ) {
2014-01-06 13:15:37 -05:00
File [ ] f = root . listFiles ( FOLDERS ) ;
if ( f ! = null ) {
addAll ( volumes , f ) ;
}
2013-11-16 07:10:23 -05:00
}
}
}
2014-01-06 13:15:37 -05:00
2013-12-13 23:11:44 -05:00
volumeRoots = unmodifiableSet ( volumes ) ;
2013-11-16 07:10:23 -05:00
}
return volumeRoots ;
}
2013-11-16 00:37:41 -05:00
public Pattern getStructureRootPattern ( ) throws IOException {
if ( structureRootFolderPattern = = null ) {
List < String > folders = new ArrayList < String > ( ) ;
for ( String it : queryBlacklistResource . get ( ) ) {
2014-01-11 04:04:49 -05:00
if ( it . startsWith ( " ^ " ) & & it . endsWith ( " $ " ) ) {
2013-11-16 00:37:41 -05:00
folders . add ( it ) ;
}
}
structureRootFolderPattern = compile ( join ( folders , " | " ) , CASE_INSENSITIVE | UNICODE_CASE ) ;
}
return structureRootFolderPattern ;
}
2012-02-23 13:48:35 -05:00
public Pattern getLanguageTagPattern ( Collection < String > languages ) {
2012-02-15 01:12:09 -05:00
// [en]
2012-07-26 04:45:15 -04:00
return compile ( " (?<=[- \\ [{(])( " + join ( quoteAll ( languages ) , " | " ) + " )(?= \\ p{Punct}) " , CASE_INSENSITIVE | UNICODE_CASE ) ;
2012-01-02 11:59:37 -05:00
}
2013-08-10 03:56:11 -04:00
2012-11-22 11:45:40 -05:00
public Pattern getLanguageSuffixPattern ( Collection < String > languages , boolean strict ) {
2012-02-15 01:12:09 -05:00
// .en.srt
2013-04-08 01:29:12 -04:00
return compile ( " (?<=[.])( " + join ( quoteAll ( languages ) , " | " ) + " )(?=[._ ]*$) " , ( strict ? 0 : CASE_INSENSITIVE ) | UNICODE_CASE ) ;
2011-12-30 10:34:02 -05:00
}
2013-08-10 03:56:11 -04:00
2011-12-30 10:34:02 -05:00
public Pattern getResolutionPattern ( ) {
// match screen resolutions 640x480, 1280x720, etc
return compile ( " (?<! \\ p{Alnum})( \\ d{4}|[6-9] \\ d{2})x( \\ d{4}|[4-9] \\ d{2})(?! \\ p{Alnum}) " ) ;
}
2013-08-10 03:56:11 -04:00
2013-11-27 13:49:15 -05:00
public Pattern getVideoFormatPattern ( boolean strict ) {
2011-11-14 06:43:22 -05:00
// pattern matching any video source name
2011-12-30 16:42:25 -05:00
String pattern = getBundle ( getClass ( ) . getName ( ) ) . getString ( " pattern.video.format " ) ;
2013-11-27 13:49:15 -05:00
return strict ? compile ( " (?<! \\ p{Alnum})( " + pattern + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE ) : compile ( pattern , CASE_INSENSITIVE ) ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
public Pattern getVideoSourcePattern ( ) {
// pattern matching any video source name
String pattern = getBundle ( getClass ( ) . getName ( ) ) . getString ( " pattern.video.source " ) ;
return compile ( " (?<! \\ p{Alnum})( " + pattern + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE ) ;
}
2013-08-10 03:56:11 -04:00
2012-06-22 03:47:26 -04:00
public Pattern getClutterBracketPattern ( boolean strict ) {
// match patterns like [Action, Drama] or {ENG-XViD-MP3-DVDRiP} etc
String contentFilter = strict ? " [ \\ p{Space} \\ p{Punct}&&[^ \\ [ \\ ]]] " : " \\ p{Alpha} " ;
return compile ( " (?: \\ [([^ \\ [ \\ ]]+? " + contentFilter + " [^ \\ [ \\ ]]+?) \\ ])|(?: \\ {([^ \\ { \\ }]+? " + contentFilter + " [^ \\ { \\ }]+?) \\ })|(?: \\ (([^ \\ ( \\ )]+? " + contentFilter + " [^ \\ ( \\ )]+?) \\ )) " ) ;
}
2013-08-10 03:56:11 -04:00
2012-07-05 23:10:26 -04:00
public Pattern getReleaseGroupPattern ( boolean strict ) throws IOException {
2011-11-14 06:43:22 -05:00
// pattern matching any release group name enclosed in separators
2013-09-03 08:07:55 -04:00
return compile ( " (?<! \\ p{Alnum})( " + join ( releaseGroupResource . get ( ) , " | " ) + " )(?! \\ p{Alnum}|[^ \\ p{Alnum}] \\ d{4}) " , strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE ) ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2012-07-05 23:10:26 -04:00
public Pattern getBlacklistPattern ( ) throws IOException {
2011-12-30 16:42:25 -05:00
// pattern matching any release group name enclosed in separators
2012-07-26 04:45:15 -04:00
return compile ( " (?<! \\ p{Alnum})( " + join ( queryBlacklistResource . get ( ) , " | " ) + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE | UNICODE_CASE ) ;
}
2013-08-10 03:56:11 -04:00
2012-07-26 04:45:15 -04:00
public Pattern getExcludePattern ( ) throws IOException {
// pattern matching any release group name enclosed in separators
return compile ( join ( excludeBlacklistResource . get ( ) , " | " ) , CASE_INSENSITIVE | UNICODE_CASE ) ;
2012-01-01 22:48:24 -05:00
}
2013-08-10 03:56:11 -04:00
2014-02-19 15:28:00 -05:00
public Pattern getCustomRemovePattern ( Collection < String > terms ) throws IOException {
return compile ( " (?<! \\ p{Alnum})( " + join ( quoteAll ( terms ) , " | " ) + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE | UNICODE_CASE ) ;
}
2012-07-05 23:10:26 -04:00
public Movie [ ] getMovieList ( ) throws IOException {
2012-01-01 22:48:24 -05:00
return movieListResource . get ( ) ;
2011-12-30 16:42:25 -05:00
}
2013-08-10 03:56:11 -04:00
2012-10-14 07:57:25 -04:00
public TheTVDBSearchResult [ ] getTheTVDBIndex ( ) throws IOException {
2013-03-17 10:19:11 -04:00
return tvdbIndexResource . get ( ) ;
}
2013-08-10 03:56:11 -04:00
2013-03-17 10:19:11 -04:00
public AnidbSearchResult [ ] getAnidbIndex ( ) throws IOException {
return anidbIndexResource . get ( ) ;
2012-10-14 07:57:25 -04:00
}
2013-08-10 03:56:11 -04:00
2013-12-13 23:11:44 -05:00
private Map < Pattern , String > seriesDirectMappings ;
2012-10-09 11:04:14 -04:00
public Map < Pattern , String > getSeriesDirectMappings ( ) throws IOException {
2013-12-13 23:11:44 -05:00
if ( seriesDirectMappings = = null ) {
Map < Pattern , String > mappings = new LinkedHashMap < Pattern , String > ( ) ;
for ( String line : seriesDirectMappingsResource . get ( ) ) {
String [ ] tsv = line . split ( " \ t " , 2 ) ;
if ( tsv . length = = 2 ) {
mappings . put ( compile ( " (?<! \\ p{Alnum})( " + tsv [ 0 ] + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE | UNICODE_CASE ) , tsv [ 1 ] ) ;
}
2012-10-09 11:04:14 -04:00
}
2013-12-13 23:11:44 -05:00
seriesDirectMappings = unmodifiableMap ( mappings ) ;
2012-10-09 11:04:14 -04:00
}
2013-12-13 23:11:44 -05:00
return seriesDirectMappings ;
2012-10-09 11:04:14 -04:00
}
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
public FileFilter getDiskFolderFilter ( ) {
return new FolderEntryFilter ( compile ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " pattern.diskfolder.entry " ) ) ) ;
}
2013-08-10 03:56:11 -04:00
2013-09-17 13:32:38 -04:00
public FileFilter getDiskFolderEntryFilter ( ) {
return new RegexFileFilter ( compile ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " pattern.diskfolder.entry " ) ) ) ;
}
2012-07-26 04:45:15 -04:00
public FileFilter getClutterFileFilter ( ) throws IOException {
2013-10-29 01:55:30 -04:00
return new ClutterFileFilter ( getExcludePattern ( ) , Long . parseLong ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " number.clutter.maxfilesize " ) ) ) ; // only files smaller than 250 MB may be considered clutter
2012-06-15 06:45:35 -04:00
}
2013-08-10 03:56:11 -04:00
2014-01-06 13:15:37 -05:00
public List < File > getMediaRoots ( ) {
List < File > roots = new ArrayList < File > ( ) ;
for ( String it : getBundle ( getClass ( ) . getName ( ) ) . getString ( " folder.media.roots " ) . split ( " : " ) ) {
roots . add ( new File ( it ) ) ;
}
return roots ;
}
2011-11-26 10:41:58 -05:00
// fetch release group names online and try to update the data every other day
2012-01-01 22:48:24 -05:00
protected final CachedResource < String [ ] > releaseGroupResource = new PatternResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.release-groups " ) ) ;
protected final CachedResource < String [ ] > queryBlacklistResource = new PatternResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.query-blacklist " ) ) ;
2012-07-26 04:45:15 -04:00
protected final CachedResource < String [ ] > excludeBlacklistResource = new PatternResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.exclude-blacklist " ) ) ;
2012-01-01 22:48:24 -05:00
protected final CachedResource < Movie [ ] > movieListResource = new MovieResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.movie-list " ) ) ;
2012-10-09 11:04:14 -04:00
protected final CachedResource < String [ ] > seriesDirectMappingsResource = new PatternResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.series-mappings " ) ) ;
2013-03-17 10:19:11 -04:00
protected final CachedResource < TheTVDBSearchResult [ ] > tvdbIndexResource = new TheTVDBIndexResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.thetvdb-index " ) ) ;
protected final CachedResource < AnidbSearchResult [ ] > anidbIndexResource = new AnidbIndexResource ( getBundle ( getClass ( ) . getName ( ) ) . getString ( " url.anidb-index " ) ) ;
2013-08-10 03:56:11 -04:00
2011-12-30 16:42:25 -05:00
protected static class PatternResource extends CachedResource < String [ ] > {
2013-08-10 03:56:11 -04:00
2011-12-30 16:42:25 -05:00
public PatternResource ( String resource ) {
2013-12-15 11:01:26 -05:00
super ( resource , String [ ] . class , ONE_WEEK ) ; // check for updates every week
2011-12-30 16:42:25 -05:00
}
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
@Override
public String [ ] process ( ByteBuffer data ) {
2011-12-30 16:42:25 -05:00
return compile ( " \\ n " ) . split ( Charset . forName ( " UTF-8 " ) . decode ( data ) ) ;
2011-11-14 06:43:22 -05:00
}
2011-12-30 16:42:25 -05:00
}
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
protected static class MovieResource extends CachedResource < Movie [ ] > {
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
public MovieResource ( String resource ) {
2013-11-03 11:32:40 -05:00
super ( resource , Movie [ ] . class , ONE_MONTH ) ; // check for updates every month
2012-01-01 22:48:24 -05:00
}
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
@Override
public Movie [ ] process ( ByteBuffer data ) throws IOException {
2013-09-07 11:48:24 -04:00
List < String [ ] > rows = readCSV ( new XZInputStream ( new ByteBufferInputStream ( data ) ) , " UTF-8 " , " \ t " ) ;
List < Movie > movies = new ArrayList < Movie > ( rows . size ( ) ) ;
for ( String [ ] row : rows ) {
int imdbid = parseInt ( row [ 0 ] ) ;
2013-11-20 05:07:25 -05:00
int tmdbid = parseInt ( row [ 1 ] ) ;
int year = parseInt ( row [ 2 ] ) ;
String name = row [ 3 ] ;
String [ ] aliasNames = copyOfRange ( row , 4 , row . length ) ;
movies . add ( new Movie ( name , aliasNames , year , imdbid > 0 ? imdbid : - 1 , tmdbid > 0 ? tmdbid : - 1 ) ) ;
2012-01-01 22:48:24 -05:00
}
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
return movies . toArray ( new Movie [ 0 ] ) ;
}
}
2013-08-10 03:56:11 -04:00
2012-10-14 07:57:25 -04:00
protected static class TheTVDBIndexResource extends CachedResource < TheTVDBSearchResult [ ] > {
2013-08-10 03:56:11 -04:00
2012-10-14 07:57:25 -04:00
public TheTVDBIndexResource ( String resource ) {
2013-12-15 11:01:26 -05:00
super ( resource , TheTVDBSearchResult [ ] . class , ONE_WEEK ) ; // check for updates every week
2012-10-14 07:57:25 -04:00
}
2013-08-10 03:56:11 -04:00
2012-10-14 07:57:25 -04:00
@Override
public TheTVDBSearchResult [ ] process ( ByteBuffer data ) throws IOException {
2013-09-07 11:48:24 -04:00
List < String [ ] > rows = readCSV ( new XZInputStream ( new ByteBufferInputStream ( data ) ) , " UTF-8 " , " \ t " ) ;
List < TheTVDBSearchResult > tvshows = new ArrayList < TheTVDBSearchResult > ( rows . size ( ) ) ;
for ( String [ ] row : rows ) {
int id = parseInt ( row [ 0 ] ) ;
String name = row [ 1 ] ;
String [ ] aliasNames = copyOfRange ( row , 2 , row . length ) ;
tvshows . add ( new TheTVDBSearchResult ( name , aliasNames , id ) ) ;
2012-10-14 07:57:25 -04:00
}
2013-08-10 03:56:11 -04:00
2012-10-14 07:57:25 -04:00
return tvshows . toArray ( new TheTVDBSearchResult [ 0 ] ) ;
2012-02-11 09:03:54 -05:00
}
}
2013-08-10 03:56:11 -04:00
2013-03-17 10:19:11 -04:00
protected static class AnidbIndexResource extends CachedResource < AnidbSearchResult [ ] > {
2013-08-10 03:56:11 -04:00
2013-03-17 10:19:11 -04:00
public AnidbIndexResource ( String resource ) {
2013-12-15 11:01:26 -05:00
super ( resource , AnidbSearchResult [ ] . class , ONE_MONTH ) ; // check for updates every month
2013-03-17 10:19:11 -04:00
}
2013-08-10 03:56:11 -04:00
2013-03-17 10:19:11 -04:00
@Override
public AnidbSearchResult [ ] process ( ByteBuffer data ) throws IOException {
2013-09-07 11:48:24 -04:00
List < String [ ] > rows = readCSV ( new XZInputStream ( new ByteBufferInputStream ( data ) ) , " UTF-8 " , " \ t " ) ;
List < AnidbSearchResult > anime = new ArrayList < AnidbSearchResult > ( rows . size ( ) ) ;
for ( String [ ] row : rows ) {
int aid = parseInt ( row [ 0 ] ) ;
String primaryTitle = row [ 1 ] ;
String [ ] aliasNames = copyOfRange ( row , 2 , row . length ) ;
anime . add ( new AnidbSearchResult ( aid , primaryTitle , aliasNames ) ) ;
2013-03-17 10:19:11 -04:00
}
2013-08-10 03:56:11 -04:00
2013-03-17 10:19:11 -04:00
return anime . toArray ( new AnidbSearchResult [ 0 ] ) ;
}
}
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
protected static class FolderEntryFilter implements FileFilter {
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
private final Pattern entryPattern ;
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
public FolderEntryFilter ( Pattern entryPattern ) {
this . entryPattern = entryPattern ;
}
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
@Override
public boolean accept ( File dir ) {
if ( dir . isDirectory ( ) ) {
for ( String entry : dir . list ( ) ) {
if ( entryPattern . matcher ( entry ) . matches ( ) ) {
return true ;
}
}
}
return false ;
}
}
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
public static class FileFolderNameFilter implements FileFilter {
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
private final Pattern namePattern ;
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
public FileFolderNameFilter ( Pattern namePattern ) {
this . namePattern = namePattern ;
}
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
@Override
public boolean accept ( File file ) {
return ( namePattern . matcher ( file . getName ( ) ) . find ( ) | | ( file . isFile ( ) & & namePattern . matcher ( file . getParentFile ( ) . getName ( ) ) . find ( ) ) ) ;
}
}
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
public static class ClutterFileFilter extends FileFolderNameFilter {
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
private long maxFileSize ;
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
public ClutterFileFilter ( Pattern namePattern , long maxFileSize ) {
super ( namePattern ) ;
this . maxFileSize = maxFileSize ;
}
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
@Override
public boolean accept ( File file ) {
return super . accept ( file ) & & file . isFile ( ) & & file . length ( ) < maxFileSize ;
}
}
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
private Collection < String > quoteAll ( Collection < String > strings ) {
List < String > patterns = new ArrayList < String > ( strings . size ( ) ) ;
for ( String it : strings ) {
patterns . add ( Pattern . quote ( it ) ) ;
}
return patterns ;
}
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
private Map < String , Locale > getLanguageMap ( Locale . . . supportedDisplayLocale ) {
// use maximum strength collator by default
Collator collator = Collator . getInstance ( Locale . ROOT ) ;
collator . setDecomposition ( Collator . FULL_DECOMPOSITION ) ;
collator . setStrength ( Collator . PRIMARY ) ;
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
@SuppressWarnings ( " unchecked " )
Comparator < String > order = ( Comparator ) collator ;
2013-12-13 21:22:31 -05:00
Map < String , Locale > languageMap = new TreeMap < String , Locale > ( order ) ;
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
for ( String code : Locale . getISOLanguages ( ) ) {
2013-12-13 21:22:31 -05:00
Locale locale = new Locale ( code ) ; // force ISO3 language as default toString() value
Locale iso3locale = new Locale ( locale . getISO3Language ( ) ) ;
languageMap . put ( locale . getLanguage ( ) , iso3locale ) ;
languageMap . put ( locale . getISO3Language ( ) , iso3locale ) ;
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
// map display language names for given locales
2012-07-24 16:01:48 -04:00
for ( Locale language : new HashSet < Locale > ( asList ( supportedDisplayLocale ) ) ) {
2012-02-15 01:12:09 -05:00
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
String languageName = Normalizer . normalize ( locale . getDisplayLanguage ( language ) , Form . NFKD ) ;
2013-12-13 21:22:31 -05:00
languageMap . put ( languageName . toLowerCase ( ) , iso3locale ) ;
2012-02-15 01:12:09 -05:00
}
}
2013-08-10 03:56:11 -04:00
2013-12-13 21:22:31 -05:00
// unofficial language for pb/pob for Portuguese (Brazil)
Locale brazil = new Locale ( " pob " ) ;
languageMap . put ( " pb " , brazil ) ;
languageMap . put ( " pob " , brazil ) ;
2012-02-15 01:12:09 -05:00
// remove illegal tokens
languageMap . remove ( " " ) ;
2012-06-22 03:47:26 -04:00
languageMap . remove ( " II " ) ;
languageMap . remove ( " III " ) ;
2013-01-27 01:04:32 -05:00
languageMap . remove ( " hi " ) ; // hi => hearing-impaired subtitles, NOT hindi language
2013-08-10 03:56:11 -04:00
2012-06-22 03:47:26 -04:00
Map < String , Locale > result = unmodifiableMap ( languageMap ) ;
return result ;
2012-02-15 01:12:09 -05:00
}
2011-11-14 06:43:22 -05:00
}