* support for query term blacklist

This commit is contained in:
Reinhard Pointner 2011-12-30 21:42:25 +00:00
parent c9a956cbe2
commit 2734278249
4 changed files with 62 additions and 10 deletions

View File

@ -3,7 +3,6 @@ package net.sourceforge.filebot.media;
import static java.util.ResourceBundle.*;
import static java.util.concurrent.TimeUnit.*;
import static java.util.regex.Pattern.*;
import static net.sourceforge.tuned.StringUtilities.*;
@ -54,12 +53,12 @@ public class ReleaseInfo {
public List<String> cleanRelease(Iterable<String> items) throws IOException {
return clean(items, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getCodecPattern(), getResolutionPattern());
return clean(items, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern());
}
public String cleanRelease(String item) throws IOException {
return clean(item, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getCodecPattern(), getResolutionPattern());
return clean(item, getReleaseGroupPattern(), getLanguageSuffixPattern(), getVideoSourcePattern(), getVideoFormatPattern(), getResolutionPattern(), getBlacklistPattern());
}
@ -106,9 +105,9 @@ public class ReleaseInfo {
}
public Pattern getCodecPattern() {
public Pattern getVideoFormatPattern() {
// pattern matching any video source name
String pattern = getBundle(getClass().getName()).getString("pattern.codec");
String pattern = getBundle(getClass().getName()).getString("pattern.video.format");
return compile("(?<!\\p{Alnum})(" + pattern + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
@ -126,13 +125,28 @@ public class ReleaseInfo {
}
public Pattern getBlacklistPattern() throws IOException {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(blacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE);
}
// fetch release group names online and try to update the data every other day
protected final CachedResource<String[]> releaseGroupResource = new CachedResource<String[]>(getBundle(getClass().getName()).getString("url.release-groups"), String[].class, DAYS.toMillis(1)) {
protected final PatternResource releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
protected final PatternResource blacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.term-blacklist"));
protected static class PatternResource extends CachedResource<String[]> {
public PatternResource(String resource) {
super(resource, String[].class, 24 * 60 * 60 * 1000); // 24h update interval
}
@Override
public String[] process(ByteBuffer data) {
return compile("\\s+").split(Charset.forName("UTF-8").decode(data));
return compile("\\n").split(Charset.forName("UTF-8").decode(data));
}
};
}
}

View File

@ -1,8 +1,11 @@
# source names mostly copied from [http://en.wikipedia.org/wiki/Pirated_movie_release_types]
pattern.video.source: CAMRip|CAM|TS|TELESYNC|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R5|R5LINE|DVDRip|DVDR|TVRip|DSR|PDTV|HDTV|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR
pattern.video.source: CAMRip|CAM|TS|TELESYNC|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R5|R5LINE|DVDRip|DVDR|TVRip|DSR|PDTV|HDTV|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR|WorkPrint|VHS|VCD
# additional release info patterns
pattern.codec: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|ac3|2ch|6ch|720p|1080p
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|ac3|2ch|6ch|ws|hr|720p|1080p
# group names mostly copied from [http://scenelingo.wordpress.com/list-of-scene-release-groups]
url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
# blacklisted terms that will be ignored
url.term-blacklist: http://filebot.sourceforge.net/data/term-blacklist.txt

View File

@ -1,7 +1,11 @@
0TV
1337x
1440
187HD
1920
2HD
2PaCaVeLi
2WIRE
3Li
4HM
aAF
@ -44,6 +48,7 @@ COALiTiON
CPtScene
CPY
CRF
CRIMSON
Crow
CSHD
CtrlHD
@ -70,7 +75,9 @@ DiMiTri
DiNA
DiR
disc
DiTa
DiVERSiTY
DivXNL
DivXNL-Team
DMT
DnB
@ -85,6 +92,7 @@ ETM
EUHD
EuReKA
ExtraTorrentRG
eztv
FHM
FLAiTE
fLAMEhd
@ -94,11 +102,13 @@ FmE
ForceBleue
FoV
FPG
FQM
FSiHD
Ft4U
FTVDT
Funner
FXG
FxM
GB
GEHENNA
GiNJi
@ -139,17 +149,21 @@ InSaNiTy
iNSECTS
iNSPiRED
iON
iTA
ITZ
Japhson
JAVLiU
k2
KaKa
keltz
KLAXXON
KOENiG
KRaLiMaRKo
KYR
Larceny
LEViTY
LiPAN
LMAO
LoD
LOL
LOLCATS
@ -177,6 +191,7 @@ NhaNc3
NiF
Nile
NiX
NL.Subs
Noir
NOsegmenT
NoTV
@ -185,6 +200,7 @@ NSUBS
NWO
NyTT
OAS
Omifast
ONYX
ORC
ORENJi
@ -245,8 +261,10 @@ SLO
Sneak
SoCkS
SoW
STV
SUNSPOT
SVD
Swesub
SYS
TELEFLiX
TERRA
@ -259,6 +277,7 @@ TN
TOKUS
trentalent
TruCK
TRUEFRENCH
TVA
TX
ULTiMATE
@ -266,15 +285,18 @@ UMF
USELESS
VanRay
VCDVaULT
ViCiOsO
ViNYL
ViSiON
ViSTA
VLiS
VOA
VoMiT
VOSTFR
VoX
VoXHD
w0rm
w4f
WANKAZ
WHATELSE
WHiiZz
@ -285,6 +307,9 @@ WPi
WuSiWuG
XiA
XOR
XOXO
xRG
xRipp
XSHD
XTM
XTSF

View File

@ -0,0 +1,10 @@
CD[1-3]
Demonoid
ExtraScene
ExtraTorrent
PROPER
READNFO
REPACK
RETAIL
ShareReactor
ShareZONE