2014-04-19 02:30:29 -04:00
package net.filebot.media ;
2011-11-14 06:43:22 -05:00
2013-09-11 13:22:00 -04:00
import static java.lang.Integer.* ;
2016-03-08 07:59:24 -05:00
import static java.nio.charset.StandardCharsets.* ;
2013-09-11 13:22:00 -04:00
import static java.util.Arrays.* ;
import static java.util.Collections.* ;
import static java.util.ResourceBundle.* ;
import static java.util.regex.Pattern.* ;
2016-03-12 09:09:14 -05:00
import static java.util.stream.Collectors.* ;
2016-04-13 07:21:55 -04:00
import static net.filebot.Settings.* ;
2014-04-19 02:30:29 -04:00
import static net.filebot.similarity.Normalization.* ;
import static net.filebot.util.FileUtilities.* ;
2016-04-02 05:07:10 -04:00
import static net.filebot.util.RegularExpressions.* ;
2014-04-19 02:30:29 -04:00
import static net.filebot.util.StringUtilities.* ;
2011-11-14 06:43:22 -05:00
2016-03-08 07:59:24 -05:00
import java.io.BufferedReader ;
import java.io.ByteArrayInputStream ;
2011-11-14 06:43:22 -05:00
import java.io.File ;
2012-02-10 11:43:09 -05:00
import java.io.FileFilter ;
2011-11-14 06:43:22 -05:00
import java.io.IOException ;
2016-03-08 07:59:24 -05:00
import java.io.InputStreamReader ;
import java.net.URL ;
2012-02-15 01:12:09 -05:00
import java.text.Collator ;
import java.text.Normalizer ;
import java.text.Normalizer.Form ;
2016-03-08 07:59:24 -05:00
import java.time.Duration ;
2011-11-14 06:43:22 -05:00
import java.util.ArrayList ;
2012-02-15 01:12:09 -05:00
import java.util.Collection ;
import java.util.HashSet ;
2012-10-09 11:04:14 -04:00
import java.util.LinkedHashMap ;
2011-11-14 06:43:22 -05:00
import java.util.List ;
2011-12-30 10:34:02 -05:00
import java.util.Locale ;
2012-01-02 11:59:37 -05:00
import java.util.Map ;
2016-03-08 07:59:24 -05:00
import java.util.Objects ;
2012-02-15 01:12:09 -05:00
import java.util.Set ;
2012-01-02 11:59:37 -05:00
import java.util.TreeMap ;
2016-03-08 07:59:24 -05:00
import java.util.function.Function ;
import java.util.function.IntFunction ;
2011-11-14 06:43:22 -05:00
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
2016-03-29 07:53:21 -04:00
import java.util.stream.IntStream ;
2011-11-14 06:43:22 -05:00
2016-03-12 08:28:04 -05:00
import org.tukaani.xz.XZInputStream ;
2016-08-04 03:05:54 -04:00
import net.filebot.ApplicationFolder ;
2016-03-08 07:59:24 -05:00
import net.filebot.Cache ;
import net.filebot.CacheType ;
import net.filebot.Resource ;
2014-04-19 02:30:29 -04:00
import net.filebot.util.FileUtilities.RegexFileFilter ;
2016-03-12 08:28:04 -05:00
import net.filebot.util.SystemProperty ;
2014-04-19 02:30:29 -04:00
import net.filebot.web.Movie ;
2016-03-26 13:40:54 -04:00
import net.filebot.web.SearchResult ;
2015-05-11 07:42:59 -04:00
import net.filebot.web.SubtitleSearchResult ;
2011-11-14 06:43:22 -05:00
public class ReleaseInfo {
2013-08-10 03:56:11 -04:00
2016-01-28 11:18:01 -05:00
private String [ ] videoSources ;
private Pattern videoSourcePattern ;
2014-06-29 07:04:04 -04:00
public String getVideoSource ( String . . . input ) {
2016-01-28 11:18:01 -05:00
if ( videoSources = = null | | videoSourcePattern = = null ) {
videoSources = PIPE . split ( getProperty ( " pattern.video.source " ) ) ;
videoSourcePattern = getVideoSourcePattern ( ) ;
}
2011-11-14 06:43:22 -05:00
// check parent and itself for group names
2016-01-28 11:18:01 -05:00
return matchLast ( videoSourcePattern , videoSources , input ) ;
2014-06-29 07:04:04 -04:00
}
2016-01-28 11:18:01 -05:00
private Pattern videoTagPattern ;
2014-06-29 07:04:04 -04:00
public List < String > getVideoTags ( String . . . input ) {
2016-01-28 11:18:01 -05:00
if ( videoTagPattern = = null ) {
videoTagPattern = getVideoTagPattern ( ) ;
}
2014-06-29 07:04:04 -04:00
List < String > tags = new ArrayList < String > ( ) ;
for ( String s : input ) {
if ( s = = null )
continue ;
2016-01-28 11:18:01 -05:00
Matcher m = videoTagPattern . matcher ( s ) ;
2014-06-29 07:04:04 -04:00
while ( m . find ( ) ) {
tags . add ( m . group ( ) ) ;
}
}
return tags ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2015-11-30 03:51:42 -05:00
public String getStereoscopic3D ( String . . . input ) {
Pattern pattern = getStereoscopic3DPattern ( ) ;
for ( String s : input ) {
Matcher m = pattern . matcher ( s ) ;
if ( m . find ( ) ) {
return m . group ( ) ;
}
}
return null ;
}
2016-03-29 09:02:49 -04:00
public String getReleaseGroup ( String . . . name ) throws Exception {
2012-06-27 22:36:32 -04:00
// check file and folder for release group names
2016-03-08 07:59:24 -05:00
String [ ] groups = releaseGroup . get ( ) ;
2013-08-10 03:56:11 -04:00
2012-06-27 22:36:32 -04:00
// try case-sensitive match
2016-03-29 09:02:49 -04:00
String match = matchLast ( getReleaseGroupPattern ( true ) , groups , name ) ;
2013-08-10 03:56:11 -04:00
2016-03-20 17:21:08 -04:00
if ( match ! = null ) {
return match ;
2012-06-27 22:36:32 -04:00
}
2013-08-10 03:56:11 -04:00
2016-03-20 17:21:08 -04:00
// try case-insensitive match
2016-03-29 09:02:49 -04:00
return matchLast ( getReleaseGroupPattern ( false ) , groups , name ) ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-24 07:13:47 -04:00
private Pattern languageTag ;
2016-01-28 11:18:01 -05:00
2016-03-24 11:21:36 -04:00
public Locale getSubtitleLanguageTag ( CharSequence . . . name ) {
2012-01-02 11:59:37 -05:00
// match locale identifier and lookup Locale object
2016-03-24 07:13:47 -04:00
if ( languageTag = = null ) {
2016-09-20 01:10:31 -04:00
languageTag = getSubtitleLanguageTagPattern ( ) ;
2016-01-28 11:18:01 -05:00
}
2016-09-08 04:27:45 -04:00
2016-03-24 07:13:47 -04:00
String lang = matchLast ( languageTag , null , name ) ;
2016-09-20 01:10:31 -04:00
return lang = = null ? null : getDefaultLanguageMap ( ) . get ( lang ) ;
2016-03-24 07:13:47 -04:00
}
2013-08-10 03:56:11 -04:00
2016-03-24 07:13:47 -04:00
private Pattern categoryTag ;
2016-09-20 01:10:31 -04:00
private String [ ] categoryTags ;
2013-08-10 03:56:11 -04:00
2016-03-24 07:13:47 -04:00
public String getSubtitleCategoryTag ( CharSequence . . . name ) {
// match locale identifier and lookup Locale object
2016-09-20 01:10:31 -04:00
if ( categoryTag = = null | | categoryTags = = null ) {
categoryTag = getSubtitleCategoryTagPattern ( ) ;
categoryTags = getSubtitleCategoryTags ( ) ;
2016-03-24 07:13:47 -04:00
}
2016-09-20 01:10:31 -04:00
return matchLast ( categoryTag , categoryTags , name ) ;
2012-01-02 11:59:37 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-20 17:21:08 -04:00
protected String matchLast ( Pattern pattern , String [ ] paragon , CharSequence . . . sequence ) {
2011-11-14 06:43:22 -05:00
String lastMatch = null ;
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
// match last occurrence
2011-11-14 06:43:22 -05:00
for ( CharSequence name : sequence ) {
if ( name = = null )
continue ;
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
Matcher matcher = pattern . matcher ( name ) ;
while ( matcher . find ( ) ) {
lastMatch = matcher . group ( ) ;
}
}
2013-08-10 03:56:11 -04:00
2012-01-01 22:48:24 -05:00
// prefer standard value over matched value
2016-03-20 17:21:08 -04:00
if ( lastMatch ! = null & & paragon ! = null ) {
for ( String it : paragon ) {
2016-04-01 13:47:57 -04:00
lastMatch = compile ( " (?<! \\ p{Alnum}) " + quote ( it ) + " (?! \\ p{Alnum}) " , CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS ) . matcher ( lastMatch ) . replaceAll ( it ) ;
2012-01-01 22:48:24 -05:00
}
}
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
return lastMatch ;
}
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
// cached patterns
2016-03-12 09:09:14 -05:00
private final Pattern [ ] [ ] stopwords = new Pattern [ 2 ] [ ] ;
private final Pattern [ ] [ ] blacklist = new Pattern [ 2 ] [ ] ;
2013-08-10 03:56:11 -04:00
2016-03-08 07:59:24 -05:00
public List < String > cleanRelease ( Collection < String > items , boolean strict ) throws Exception {
2016-03-12 09:09:14 -05:00
int b = strict ? 1 : 0 ;
2013-08-10 03:56:11 -04:00
2012-07-24 16:01:48 -04:00
// initialize cached patterns
2016-03-12 09:09:14 -05:00
if ( stopwords [ b ] = = null | | blacklist [ b ] = = null ) {
Pattern clutterBracket = getClutterBracketPattern ( strict ) ;
Pattern releaseGroup = getReleaseGroupPattern ( strict ) ;
2016-03-12 10:19:27 -05:00
Pattern releaseGroupTrim = getReleaseGroupTrimPattern ( ) ;
2016-09-20 01:10:31 -04:00
Pattern languageSuffix = getSubtitleLanguageTagPattern ( ) ;
Pattern languageTag = getLanguageTagPattern ( strict ) ;
2016-03-12 09:09:14 -05:00
Pattern videoSource = getVideoSourcePattern ( ) ;
Pattern videoTags = getVideoTagPattern ( ) ;
Pattern videoFormat = getVideoFormatPattern ( strict ) ;
Pattern stereoscopic3d = getStereoscopic3DPattern ( ) ;
Pattern resolution = getResolutionPattern ( ) ;
Pattern queryBlacklist = getBlacklistPattern ( ) ;
2016-03-29 09:02:49 -04:00
stopwords [ b ] = new Pattern [ ] { languageSuffix , languageTag , videoSource , videoTags , videoFormat , resolution , stereoscopic3d } ;
2016-04-05 14:05:59 -04:00
blacklist [ b ] = new Pattern [ ] { EMBEDDED_CHECKSUM , languageSuffix , releaseGroupTrim , queryBlacklist , languageTag , clutterBracket , releaseGroup , videoSource , videoTags , videoFormat , resolution , stereoscopic3d } ;
2011-11-26 04:50:31 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-12 09:09:14 -05:00
return items . stream ( ) . map ( it - > {
2016-03-29 07:53:21 -04:00
String head = strict ? clean ( it , stopwords [ b ] ) : substringBefore ( it , stopwords [ b ] ) ;
String norm = normalizePunctuation ( clean ( head , blacklist [ b ] ) ) ;
// debug.finest(format("CLEAN: %s => %s => %s", it, head, norm));
return norm ;
2016-03-12 09:09:14 -05:00
} ) . filter ( s - > s . length ( ) > 0 ) . collect ( toList ( ) ) ;
2011-11-26 04:50:31 -05:00
}
2013-08-10 03:56:11 -04:00
2011-11-26 04:50:31 -05:00
public String clean ( String item , Pattern . . . blacklisted ) {
for ( Pattern it : blacklisted ) {
item = it . matcher ( item ) . replaceAll ( " " ) ;
2011-11-14 06:43:22 -05:00
}
2013-04-01 06:17:20 -04:00
return item ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2012-02-23 13:48:35 -05:00
public String substringBefore ( String item , Pattern . . . stopwords ) {
for ( Pattern it : stopwords ) {
Matcher matcher = it . matcher ( item ) ;
if ( matcher . find ( ) ) {
2012-07-04 06:20:52 -04:00
String substring = item . substring ( 0 , matcher . start ( ) ) ; // use substring before the matched stopword
if ( normalizePunctuation ( substring ) . length ( ) > = 3 ) {
item = substring ; // make sure that the substring has enough data
}
2012-02-23 13:48:35 -05:00
}
}
return item ;
}
2013-08-10 03:56:11 -04:00
2013-11-16 00:37:41 -05:00
// cached patterns
2013-11-16 07:10:23 -05:00
private Set < File > volumeRoots ;
2013-11-16 00:37:41 -05:00
private Pattern structureRootFolderPattern ;
2013-11-16 07:10:23 -05:00
public Set < File > getVolumeRoots ( ) {
if ( volumeRoots = = null ) {
Set < File > volumes = new HashSet < File > ( ) ;
2016-08-04 03:05:54 -04:00
File home = ApplicationFolder . UserHome . get ( ) ;
2016-04-13 14:41:06 -04:00
List < File > roots = getFileSystemRoots ( ) ;
2013-11-16 07:10:23 -05:00
// user root folder
2016-08-04 03:05:54 -04:00
volumes . add ( home ) ;
2013-11-16 07:10:23 -05:00
// Windows / Linux / Mac system roots
2016-04-13 14:41:06 -04:00
volumes . addAll ( roots ) ;
2013-11-16 07:10:23 -05:00
2016-04-13 07:21:55 -04:00
// Linux / Mac
2013-11-16 07:10:23 -05:00
if ( File . separator . equals ( " / " ) ) {
2014-01-06 13:15:37 -05:00
// Linux and Mac system root folders
2016-04-13 14:41:06 -04:00
for ( File root : roots ) {
volumes . addAll ( getChildren ( root , FOLDERS ) ) ;
2013-11-16 07:10:23 -05:00
}
2014-01-06 13:15:37 -05:00
2014-08-10 10:47:47 -04:00
for ( File mediaRoot : getMediaRoots ( ) ) {
volumes . addAll ( getChildren ( mediaRoot , FOLDERS ) ) ;
volumes . add ( mediaRoot ) ;
2016-04-13 07:21:55 -04:00
}
}
// Mac
if ( isMacSandbox ( ) ) {
2016-08-04 03:05:54 -04:00
// e.g. ignore default Movie folder (user.home and real user home are different in the sandbox environment)
for ( File userFolder : getChildren ( new File ( System . getProperty ( " user.home " ) ) , FOLDERS ) ) {
volumes . add ( new File ( home , userFolder . getName ( ) ) ) ;
2013-11-16 07:10:23 -05:00
}
2016-08-04 03:05:54 -04:00
} else {
volumes . addAll ( getChildren ( home , FOLDERS ) ) ;
2013-11-16 07:10:23 -05:00
}
2014-01-06 13:15:37 -05:00
2013-12-13 23:11:44 -05:00
volumeRoots = unmodifiableSet ( volumes ) ;
2013-11-16 07:10:23 -05:00
}
return volumeRoots ;
}
2016-03-08 07:59:24 -05:00
public Pattern getStructureRootPattern ( ) throws Exception {
2013-11-16 00:37:41 -05:00
if ( structureRootFolderPattern = = null ) {
List < String > folders = new ArrayList < String > ( ) ;
2016-03-08 07:59:24 -05:00
for ( String it : queryBlacklist . get ( ) ) {
2014-01-11 04:04:49 -05:00
if ( it . startsWith ( " ^ " ) & & it . endsWith ( " $ " ) ) {
2013-11-16 00:37:41 -05:00
folders . add ( it ) ;
}
}
2016-03-30 17:42:56 -04:00
structureRootFolderPattern = compile ( or ( folders . toArray ( ) ) , CASE_INSENSITIVE ) ;
2013-11-16 00:37:41 -05:00
}
return structureRootFolderPattern ;
}
2016-09-20 01:10:31 -04:00
public Pattern getLanguageTagPattern ( boolean strict ) {
2012-02-15 01:12:09 -05:00
// [en]
2016-03-27 18:16:23 -04:00
if ( strict ) {
2016-09-20 01:10:31 -04:00
return compile ( " (?<=[- \\ [ \\ { \\ (]) " + or ( quoteAll ( getDefaultLanguageMap ( ) . keySet ( ) ) ) + " (?=[- \\ ] \\ } \\ )]|$) " , CASE_INSENSITIVE ) ;
2016-03-27 18:16:23 -04:00
}
// FR
2016-09-20 01:10:31 -04:00
List < String > allCapsLanguageTags = getDefaultLanguageMap ( ) . keySet ( ) . stream ( ) . map ( String : : toUpperCase ) . collect ( toList ( ) ) ;
2016-03-27 18:16:23 -04:00
return compile ( " (?<! \\ p{Alnum}) " + or ( quoteAll ( allCapsLanguageTags ) ) + " (?! \\ p{Alnum}) " ) ;
2012-01-02 11:59:37 -05:00
}
2013-08-10 03:56:11 -04:00
2016-09-20 01:10:31 -04:00
public Pattern getSubtitleCategoryTagPattern ( ) {
2016-03-24 07:13:47 -04:00
// e.g. ".en.srt" or ".en.forced.srt"
2016-09-20 01:10:31 -04:00
return compile ( " (?<=[._-]( " + or ( quoteAll ( getDefaultLanguageMap ( ) . keySet ( ) ) ) + " )[._-]) " + or ( getSubtitleCategoryTags ( ) ) + " $ " , CASE_INSENSITIVE ) ;
2016-03-24 07:13:47 -04:00
}
2016-09-20 01:10:31 -04:00
public Pattern getSubtitleLanguageTagPattern ( ) {
2014-11-11 13:26:17 -05:00
// e.g. ".en.srt" or ".en.forced.srt"
2016-09-20 01:10:31 -04:00
return compile ( " (?<=[._-]) " + or ( quoteAll ( getDefaultLanguageMap ( ) . keySet ( ) ) ) + " (?=([._-] " + or ( getSubtitleCategoryTags ( ) ) + " )?$) " , CASE_INSENSITIVE ) ;
2011-12-30 10:34:02 -05:00
}
2013-08-10 03:56:11 -04:00
2011-12-30 10:34:02 -05:00
public Pattern getResolutionPattern ( ) {
// match screen resolutions 640x480, 1280x720, etc
return compile ( " (?<! \\ p{Alnum})( \\ d{4}|[6-9] \\ d{2})x( \\ d{4}|[4-9] \\ d{2})(?! \\ p{Alnum}) " ) ;
}
2013-08-10 03:56:11 -04:00
2013-11-27 13:49:15 -05:00
public Pattern getVideoFormatPattern ( boolean strict ) {
2011-11-14 06:43:22 -05:00
// pattern matching any video source name
2014-11-09 02:40:01 -05:00
String pattern = getProperty ( " pattern.video.format " ) ;
2016-10-24 11:43:20 -04:00
return strict ? compileWordPattern ( pattern ) : compile ( pattern , CASE_INSENSITIVE ) ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2011-11-14 06:43:22 -05:00
public Pattern getVideoSourcePattern ( ) {
2016-10-24 11:43:20 -04:00
return compileWordPattern ( getProperty ( " pattern.video.source " ) ) ; // pattern matching any video source name, like BluRay
2014-06-29 07:04:04 -04:00
}
public Pattern getVideoTagPattern ( ) {
2016-10-24 11:43:20 -04:00
return compileWordPattern ( getProperty ( " pattern.video.tags " ) ) ; // pattern matching any video tag, like Directors Cut
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2015-11-30 03:51:42 -05:00
public Pattern getStereoscopic3DPattern ( ) {
2016-10-24 11:43:20 -04:00
return compileWordPattern ( getProperty ( " pattern.video.s3d " ) ) ; // pattern matching any 3D flags like 3D.HSBS
}
public Pattern getRepackPattern ( ) {
return compileWordPattern ( getProperty ( " pattern.video.repack " ) ) ;
2015-11-30 03:51:42 -05:00
}
2012-06-22 03:47:26 -04:00
public Pattern getClutterBracketPattern ( boolean strict ) {
// match patterns like [Action, Drama] or {ENG-XViD-MP3-DVDRiP} etc
2016-03-29 07:53:21 -04:00
String brackets = " ()[]{} " ;
String contains = strict ? " [[^a-z0-9]&&[^ " + quote ( brackets ) + " ]] " : " \\ p{Alpha} " ;
return IntStream . range ( 0 , brackets . length ( ) / 2 ) . map ( i - > i * 2 ) . mapToObj ( i - > {
String open = quote ( brackets . substring ( i , i + 1 ) ) ;
String close = quote ( brackets . substring ( i + 1 , i + 2 ) ) ;
String notOpenClose = " [^ " + open + close + " ]+? " ;
return open + " ( " + notOpenClose + contains + notOpenClose + " ) " + close ;
} ) . collect ( collectingAndThen ( joining ( " | " ) , pattern - > compile ( pattern , CASE_INSENSITIVE ) ) ) ;
2012-06-22 03:47:26 -04:00
}
2013-08-10 03:56:11 -04:00
2016-03-08 07:59:24 -05:00
public Pattern getReleaseGroupPattern ( boolean strict ) throws Exception {
2016-03-29 09:02:49 -04:00
// match 1..N group patterns
String group = " ((?<! \\ p{Alnum}) " + or ( releaseGroup . get ( ) ) + " (?! \\ p{Alnum})[ \\ p{Punct}]??)+ " ;
// group pattern at beginning or ending of the string
String [ ] groupHeadTail = { " (?<=^[^ \\ p{Alnum}]*) " + group , group + " (?=[ \\ p{Alpha} \\ p{Punct}]*$) " } ;
2016-03-30 17:42:56 -04:00
return compile ( or ( groupHeadTail ) , strict ? 0 : CASE_INSENSITIVE ) ;
2011-11-14 06:43:22 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-12 10:19:27 -05:00
public Pattern getReleaseGroupTrimPattern ( ) throws Exception {
// pattern matching any release group name enclosed in specific separators or at the start/end
2016-03-30 17:42:56 -04:00
return compile ( " (?<= \\ [| \\ (|^) " + or ( releaseGroup . get ( ) ) + " (?= \\ ]| \\ )| \\ -)|(?<= \\ [| \\ (| \\ -) " + or ( releaseGroup . get ( ) ) + " (?= \\ ]| \\ )|$) " , CASE_INSENSITIVE ) ;
2016-03-12 10:19:27 -05:00
}
2016-03-08 07:59:24 -05:00
public Pattern getBlacklistPattern ( ) throws Exception {
2016-10-24 11:43:20 -04:00
return compileWordPattern ( queryBlacklist . get ( ) ) ; // pattern matching any release group name enclosed in separators
2012-07-26 04:45:15 -04:00
}
2013-08-10 03:56:11 -04:00
2016-03-08 07:59:24 -05:00
public Pattern getExcludePattern ( ) throws Exception {
2016-10-24 11:43:20 -04:00
return compileWordPattern ( excludeBlacklist . get ( ) ) ; // pattern matching any release group name enclosed in separators
2012-01-01 22:48:24 -05:00
}
2013-08-10 03:56:11 -04:00
2014-02-19 15:28:00 -05:00
public Pattern getCustomRemovePattern ( Collection < String > terms ) throws IOException {
2016-10-24 11:43:20 -04:00
return compileWordPattern ( quoteAll ( terms ) ) ;
}
private Pattern compileWordPattern ( String [ ] patterns ) {
return compile ( " (?<! \\ p{Alnum}) " + or ( patterns ) + " (?! \\ p{Alnum}) " , CASE_INSENSITIVE ) ; // use | to join patterns
}
private Pattern compileWordPattern ( String pattern ) {
return compile ( " (?<! \\ p{Alnum})( " + pattern + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE ) ;
2014-02-19 15:28:00 -05:00
}
2016-03-08 09:41:30 -05:00
public Map < Pattern , String > getSeriesMappings ( ) throws Exception {
return seriesMappings . get ( ) ;
2011-12-30 16:42:25 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-26 13:40:54 -04:00
public SearchResult [ ] getTheTVDBIndex ( ) throws Exception {
2016-03-08 07:59:24 -05:00
return tvdbIndex . get ( ) ;
2013-03-17 10:19:11 -04:00
}
2013-08-10 03:56:11 -04:00
2016-03-26 13:40:54 -04:00
public SearchResult [ ] getAnidbIndex ( ) throws Exception {
2016-03-08 07:59:24 -05:00
return anidbIndex . get ( ) ;
2012-10-14 07:57:25 -04:00
}
2013-08-10 03:56:11 -04:00
2016-03-08 09:41:30 -05:00
public Movie [ ] getMovieList ( ) throws Exception {
return movieIndex . get ( ) ;
2015-05-11 05:13:35 -04:00
}
2016-03-08 09:41:30 -05:00
public SubtitleSearchResult [ ] getOpenSubtitlesIndex ( ) throws Exception {
return osdbIndex . get ( ) ;
2012-10-09 11:04:14 -04:00
}
2013-08-10 03:56:11 -04:00
2016-01-28 11:18:01 -05:00
private static FolderEntryFilter diskFolderFilter ;
2012-02-10 11:43:09 -05:00
public FileFilter getDiskFolderFilter ( ) {
2016-01-28 11:18:01 -05:00
if ( diskFolderFilter = = null ) {
diskFolderFilter = new FolderEntryFilter ( compile ( getProperty ( " pattern.diskfolder.entry " ) ) ) ;
}
return diskFolderFilter ;
2012-02-10 11:43:09 -05:00
}
2013-08-10 03:56:11 -04:00
2016-01-28 11:18:01 -05:00
private static RegexFileFilter diskFolderEntryFilter ;
2013-09-17 13:32:38 -04:00
public FileFilter getDiskFolderEntryFilter ( ) {
2016-01-28 11:18:01 -05:00
if ( diskFolderEntryFilter = = null ) {
diskFolderEntryFilter = new RegexFileFilter ( compile ( getProperty ( " pattern.diskfolder.entry " ) ) ) ;
}
return diskFolderEntryFilter ;
2013-09-17 13:32:38 -04:00
}
2016-01-28 11:18:01 -05:00
private static ClutterFileFilter clutterFileFilter ;
2016-03-08 07:59:24 -05:00
public FileFilter getClutterFileFilter ( ) throws Exception {
2016-01-28 11:18:01 -05:00
if ( clutterFileFilter = = null ) {
clutterFileFilter = new ClutterFileFilter ( getExcludePattern ( ) , Long . parseLong ( getProperty ( " number.clutter.maxfilesize " ) ) ) ; // only files smaller than 250 MB may be considered clutter
}
return clutterFileFilter ;
2012-06-15 06:45:35 -04:00
}
2013-08-10 03:56:11 -04:00
2016-04-05 14:05:44 -04:00
private static RegexFileFilter systemFilesFilter ;
public FileFilter getSystemFilesFilter ( ) {
if ( systemFilesFilter = = null ) {
systemFilesFilter = new RegexFileFilter ( compile ( getProperty ( " pattern.system.files " ) , CASE_INSENSITIVE ) ) ;
}
return systemFilesFilter ;
}
2014-01-06 13:15:37 -05:00
public List < File > getMediaRoots ( ) {
2016-04-02 05:07:10 -04:00
String roots = getProperty ( " folder.media.roots " ) ;
return COMMA . splitAsStream ( roots ) . map ( File : : new ) . collect ( toList ( ) ) ;
2014-01-06 13:15:37 -05:00
}
2016-03-24 07:13:47 -04:00
public String [ ] getSubtitleCategoryTags ( ) {
2016-04-02 05:07:10 -04:00
String tags = getProperty ( " pattern.subtitle.tags " ) ;
return PIPE . split ( tags ) ;
2016-03-24 07:13:47 -04:00
}
2016-04-02 05:07:10 -04:00
private final Resource < Map < Pattern , String > > seriesMappings = resource ( " url.series-mappings " , Cache . ONE_WEEK , Function . identity ( ) , String [ ] : : new ) . transform ( lines - > {
2016-03-08 09:41:30 -05:00
Map < Pattern , String > map = new LinkedHashMap < Pattern , String > ( lines . length ) ;
2016-04-02 05:07:10 -04:00
stream ( lines ) . map ( s - > TAB . split ( s , 2 ) ) . filter ( v - > v . length = = 2 ) . forEach ( v - > {
2016-03-30 17:42:56 -04:00
Pattern pattern = compile ( " (?<! \\ p{Alnum})( " + v [ 0 ] + " )(?! \\ p{Alnum}) " , CASE_INSENSITIVE ) ;
2016-03-08 09:41:30 -05:00
map . put ( pattern , v [ 1 ] ) ;
} ) ;
return unmodifiableMap ( map ) ;
} ) . memoize ( ) ;
2016-03-08 07:59:24 -05:00
2016-04-02 05:07:10 -04:00
private final Resource < String [ ] > releaseGroup = lines ( " url.release-groups " , Cache . ONE_WEEK ) ;
private final Resource < String [ ] > queryBlacklist = lines ( " url.query-blacklist " , Cache . ONE_WEEK ) ;
private final Resource < String [ ] > excludeBlacklist = lines ( " url.exclude-blacklist " , Cache . ONE_WEEK ) ;
2016-03-08 07:59:24 -05:00
2016-04-02 05:07:10 -04:00
private final Resource < SearchResult [ ] > tvdbIndex = tsv ( " url.thetvdb-index " , Cache . ONE_WEEK , this : : parseSeries , SearchResult [ ] : : new ) ;
private final Resource < SearchResult [ ] > anidbIndex = tsv ( " url.anidb-index " , Cache . ONE_WEEK , this : : parseSeries , SearchResult [ ] : : new ) ;
2016-03-08 07:59:24 -05:00
2016-04-02 05:07:10 -04:00
private final Resource < Movie [ ] > movieIndex = tsv ( " url.movie-list " , Cache . ONE_MONTH , this : : parseMovie , Movie [ ] : : new ) ;
private final Resource < SubtitleSearchResult [ ] > osdbIndex = tsv ( " url.osdb-index " , Cache . ONE_MONTH , this : : parseSubtitle , SubtitleSearchResult [ ] : : new ) ;
2016-03-08 07:59:24 -05:00
2016-10-17 14:22:07 -04:00
private final SystemProperty < Duration > refreshDuration = SystemProperty . of ( " url.refresh " , Duration : : parse ) ;
2016-03-12 08:28:04 -05:00
2016-03-26 13:40:54 -04:00
private SearchResult parseSeries ( String [ ] v ) {
2016-03-08 07:59:24 -05:00
int id = parseInt ( v [ 0 ] ) ;
String name = v [ 1 ] ;
String [ ] aliasNames = copyOfRange ( v , 2 , v . length ) ;
2016-03-26 13:40:54 -04:00
return new SearchResult ( id , name , aliasNames ) ;
2016-03-08 07:59:24 -05:00
}
2016-03-08 09:41:30 -05:00
private Movie parseMovie ( String [ ] v ) {
int imdbid = parseInt ( v [ 0 ] ) ;
int tmdbid = parseInt ( v [ 1 ] ) ;
int year = parseInt ( v [ 2 ] ) ;
String name = v [ 3 ] ;
String [ ] aliasNames = copyOfRange ( v , 4 , v . length ) ;
return new Movie ( name , aliasNames , year , imdbid > 0 ? imdbid : - 1 , tmdbid > 0 ? tmdbid : - 1 , null ) ;
}
2016-03-08 07:59:24 -05:00
private SubtitleSearchResult parseSubtitle ( String [ ] v ) {
String kind = v [ 0 ] ;
int score = parseInt ( v [ 1 ] ) ;
int imdbId = parseInt ( v [ 2 ] ) ;
int year = parseInt ( v [ 3 ] ) ;
String name = v [ 4 ] ;
String [ ] aliasNames = copyOfRange ( v , 5 , v . length ) ;
return new SubtitleSearchResult ( name , aliasNames , year , imdbId , - 1 , Locale . ENGLISH , SubtitleSearchResult . Kind . forName ( kind ) , score ) ;
}
2016-03-08 09:41:30 -05:00
protected Resource < String [ ] > lines ( String name , Duration expirationTime ) {
2016-03-08 09:51:53 -05:00
return resource ( name , expirationTime , Function . identity ( ) , String [ ] : : new ) . memoize ( ) ;
2016-03-08 07:59:24 -05:00
}
2016-03-08 09:41:30 -05:00
protected < A > Resource < A [ ] > tsv ( String name , Duration expirationTime , Function < String [ ] , A > parse , IntFunction < A [ ] > generator ) {
2016-04-02 05:07:10 -04:00
return resource ( name , expirationTime , s - > parse . apply ( TAB . split ( s ) ) , generator ) . memoize ( ) ;
2016-03-08 07:59:24 -05:00
}
2016-03-08 09:41:30 -05:00
protected < A > Resource < A [ ] > resource ( String name , Duration expirationTime , Function < String , A > parse , IntFunction < A [ ] > generator ) {
2016-03-08 07:59:24 -05:00
return ( ) - > {
Cache cache = Cache . getCache ( " data " , CacheType . Persistent ) ;
2016-10-17 14:22:07 -04:00
byte [ ] bytes = cache . bytes ( name , n - > new URL ( getProperty ( n ) ) ) . expire ( refreshDuration . optional ( ) . orElse ( expirationTime ) ) . get ( ) ;
2016-03-08 07:59:24 -05:00
// all data file are xz compressed
try ( BufferedReader text = new BufferedReader ( new InputStreamReader ( new XZInputStream ( new ByteArrayInputStream ( bytes ) ) , UTF_8 ) ) ) {
2016-03-08 09:51:53 -05:00
return text . lines ( ) . filter ( s - > s . length ( ) > 0 ) . map ( parse ) . filter ( Objects : : nonNull ) . toArray ( generator ) ;
2016-03-08 07:59:24 -05:00
}
} ;
}
protected String getProperty ( String name ) {
// override resource locations via Java System properties
return System . getProperty ( name , getBundle ( ReleaseInfo . class . getName ( ) ) . getString ( name ) ) ;
2014-11-09 02:40:01 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-08 08:06:07 -05:00
public static class FolderEntryFilter implements FileFilter {
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
private final Pattern entryPattern ;
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
public FolderEntryFilter ( Pattern entryPattern ) {
this . entryPattern = entryPattern ;
}
2013-08-10 03:56:11 -04:00
2012-02-10 11:43:09 -05:00
@Override
public boolean accept ( File dir ) {
2016-10-06 12:56:52 -04:00
return getChildren ( dir , f - > entryPattern . matcher ( f . getName ( ) ) . matches ( ) ) . size ( ) > 0 ;
2012-02-10 11:43:09 -05:00
}
}
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
public static class FileFolderNameFilter implements FileFilter {
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
private final Pattern namePattern ;
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
public FileFolderNameFilter ( Pattern namePattern ) {
this . namePattern = namePattern ;
}
2013-08-10 03:56:11 -04:00
2012-06-15 06:45:35 -04:00
@Override
public boolean accept ( File file ) {
return ( namePattern . matcher ( file . getName ( ) ) . find ( ) | | ( file . isFile ( ) & & namePattern . matcher ( file . getParentFile ( ) . getName ( ) ) . find ( ) ) ) ;
}
}
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
public static class ClutterFileFilter extends FileFolderNameFilter {
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
private long maxFileSize ;
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
public ClutterFileFilter ( Pattern namePattern , long maxFileSize ) {
super ( namePattern ) ;
this . maxFileSize = maxFileSize ;
}
2013-08-10 03:56:11 -04:00
2013-03-28 05:04:35 -04:00
@Override
public boolean accept ( File file ) {
return super . accept ( file ) & & file . isFile ( ) & & file . length ( ) < maxFileSize ;
}
}
2013-08-10 03:56:11 -04:00
2015-05-20 03:35:45 -04:00
private String or ( Object [ ] terms ) {
2016-03-09 00:58:44 -05:00
return join ( stream ( terms ) . sorted ( reverseOrder ( ) ) , " | " , " ( " , " ) " ) ; // non-capturing group that matches the longest occurrence
2015-05-20 03:35:45 -04:00
}
private String [ ] quoteAll ( Collection < String > values ) {
return values . stream ( ) . map ( ( s ) - > Pattern . quote ( s ) ) . toArray ( String [ ] : : new ) ;
2012-02-15 01:12:09 -05:00
}
2013-08-10 03:56:11 -04:00
2016-03-24 07:13:47 -04:00
private Map < String , Locale > defaultLanguageMap ;
public Map < String , Locale > getDefaultLanguageMap ( ) {
if ( defaultLanguageMap = = null ) {
defaultLanguageMap = getLanguageMap ( Locale . ENGLISH , Locale . getDefault ( ) ) ;
}
return defaultLanguageMap ;
}
public Map < String , Locale > getLanguageMap ( Locale . . . displayLanguages ) {
// unique
displayLanguages = stream ( displayLanguages ) . distinct ( ) . toArray ( Locale [ ] : : new ) ;
2012-02-15 01:12:09 -05:00
// use maximum strength collator by default
2016-02-10 09:31:53 -05:00
Collator collator = Collator . getInstance ( Locale . ENGLISH ) ;
2012-02-15 01:12:09 -05:00
collator . setDecomposition ( Collator . FULL_DECOMPOSITION ) ;
collator . setStrength ( Collator . PRIMARY ) ;
2013-08-10 03:56:11 -04:00
2016-03-12 10:19:27 -05:00
Map < String , Locale > languageMap = new TreeMap < String , Locale > ( collator ) ;
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
for ( String code : Locale . getISOLanguages ( ) ) {
2013-12-13 21:22:31 -05:00
Locale locale = new Locale ( code ) ; // force ISO3 language as default toString() value
Locale iso3locale = new Locale ( locale . getISO3Language ( ) ) ;
languageMap . put ( locale . getLanguage ( ) , iso3locale ) ;
languageMap . put ( locale . getISO3Language ( ) , iso3locale ) ;
2013-08-10 03:56:11 -04:00
2012-02-15 01:12:09 -05:00
// map display language names for given locales
2016-03-24 07:13:47 -04:00
for ( Locale language : displayLanguages ) {
2012-02-15 01:12:09 -05:00
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
String languageName = Normalizer . normalize ( locale . getDisplayLanguage ( language ) , Form . NFKD ) ;
2013-12-13 21:22:31 -05:00
languageMap . put ( languageName . toLowerCase ( ) , iso3locale ) ;
2012-02-15 01:12:09 -05:00
}
}
2013-08-10 03:56:11 -04:00
2013-12-13 21:22:31 -05:00
// unofficial language for pb/pob for Portuguese (Brazil)
Locale brazil = new Locale ( " pob " ) ;
2014-12-03 03:45:33 -05:00
languageMap . put ( " brazilian " , brazil ) ;
2013-12-13 21:22:31 -05:00
languageMap . put ( " pb " , brazil ) ;
languageMap . put ( " pob " , brazil ) ;
2014-06-11 11:03:58 -04:00
// missing ISO 639-2 (B/T) locales (see https://github.com/TakahikoKawasaki/nv-i18n/blob/master/src/main/java/com/neovisionaries/i18n/LanguageAlpha3Code.java)
languageMap . put ( " tib " , new Locale ( " bod " ) ) ;
languageMap . put ( " cze " , new Locale ( " ces " ) ) ;
languageMap . put ( " wel " , new Locale ( " cym " ) ) ;
languageMap . put ( " ger " , new Locale ( " deu " ) ) ;
languageMap . put ( " gre " , new Locale ( " ell " ) ) ;
languageMap . put ( " baq " , new Locale ( " eus " ) ) ;
languageMap . put ( " per " , new Locale ( " fas " ) ) ;
languageMap . put ( " fre " , new Locale ( " fra " ) ) ;
languageMap . put ( " arm " , new Locale ( " hye " ) ) ;
languageMap . put ( " ice " , new Locale ( " isl " ) ) ;
languageMap . put ( " geo " , new Locale ( " kat " ) ) ;
languageMap . put ( " mac " , new Locale ( " mkd " ) ) ;
languageMap . put ( " mao " , new Locale ( " mri " ) ) ;
languageMap . put ( " may " , new Locale ( " msa " ) ) ;
languageMap . put ( " bur " , new Locale ( " mya " ) ) ;
languageMap . put ( " dut " , new Locale ( " nld " ) ) ;
languageMap . put ( " rum " , new Locale ( " ron " ) ) ;
languageMap . put ( " slo " , new Locale ( " slk " ) ) ;
languageMap . put ( " alb " , new Locale ( " sqi " ) ) ;
languageMap . put ( " chi " , new Locale ( " zho " ) ) ;
2014-05-09 16:27:18 -04:00
2012-02-15 01:12:09 -05:00
// remove illegal tokens
languageMap . remove ( " " ) ;
2012-06-22 03:47:26 -04:00
languageMap . remove ( " II " ) ;
languageMap . remove ( " III " ) ;
2016-02-25 07:16:18 -05:00
languageMap . remove ( " hi " ) ; // hi => typically used for hearing-impaired subtitles, NOT hindi language
2013-08-10 03:56:11 -04:00
2016-02-25 07:16:18 -05:00
return unmodifiableMap ( languageMap ) ;
2012-02-15 01:12:09 -05:00
}
2014-11-09 02:40:01 -05:00
2011-11-14 06:43:22 -05:00
}