* switch to using an online exclude pattern list that can be updated anytime for everybody

This commit is contained in:
Reinhard Pointner 2012-07-26 08:45:15 +00:00
parent e3fde5f139
commit 3f9c0ab67e
10 changed files with 66 additions and 37 deletions

View File

@ -16,6 +16,7 @@ def sortRegexList(path) {
// sort and check shared regex collections
sortRegexList("website/data/release-groups.txt")
sortRegexList("website/data/query-blacklist.txt")
sortRegexList("website/data/exclude-blacklist.txt")
// ------------------------------------------------------------------------- //

View File

@ -38,9 +38,6 @@ import javax.swing.SwingUtilities;
import javax.swing.UIManager;
import javax.xml.parsers.DocumentBuilderFactory;
import org.kohsuke.args4j.CmdLineException;
import org.w3c.dom.NodeList;
import net.miginfocom.swing.MigLayout;
import net.sf.ehcache.CacheManager;
import net.sourceforge.filebot.cli.ArgumentBean;
@ -56,6 +53,9 @@ import net.sourceforge.filebot.web.CachedResource;
import net.sourceforge.tuned.ByteBufferInputStream;
import net.sourceforge.tuned.PreferencesMap.PreferencesEntry;
import org.kohsuke.args4j.CmdLineException;
import org.w3c.dom.NodeList;
public class Main {
@ -264,6 +264,10 @@ public class Main {
@Override
public void run() {
try {
// pre-load filter data
MediaDetection.getClutterFileFilter();
MediaDetection.getDiskFolderFilter();
// pre-load movie/series index
List<String> dummy = Collections.singletonList("");
MediaDetection.stripReleaseInfo(dummy, true);

View File

@ -292,7 +292,7 @@ public class CmdlineOperations implements CmdlineInterface {
CLILogger.config(format("Rename movies using [%s]", service.getName()));
// ignore sample files
List<File> fileset = filter(files, NON_CLUTTER_FILES);
List<File> fileset = filter(files, not(getClutterFileFilter()));
// handle movie files
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));

View File

@ -61,17 +61,33 @@ public class MediaDetection {
public static final ReleaseInfo releaseInfo = new ReleaseInfo();
public static final FileFilter DISK_FOLDERS = releaseInfo.getDiskFolderFilter();
public static final FileFilter NON_CLUTTER_FILES = not(releaseInfo.getClutterFileFilter());
private static FileFilter diskFolder;
private static FileFilter clutterFile;
public static boolean isDiskFolder(File folder) {
return DISK_FOLDERS.accept(folder);
public static FileFilter getDiskFolderFilter() {
if (diskFolder == null) {
diskFolder = releaseInfo.getDiskFolderFilter();
}
return diskFolder;
}
public static boolean isNonClutter(File file) {
return NON_CLUTTER_FILES.accept(file);
public static FileFilter getClutterFileFilter() throws IOException {
if (clutterFile == null) {
clutterFile = releaseInfo.getClutterFileFilter();
}
return clutterFile;
}
public static boolean isDiskFolder(File folder) {
return getDiskFolderFilter().accept(folder);
}
public static boolean isClutterFile(File file) throws IOException {
return getClutterFileFilter().accept(file);
}

View File

@ -22,6 +22,7 @@ import java.text.Normalizer.Form;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
@ -102,10 +103,8 @@ public class ReleaseInfo {
}
// cached patterns
private Pattern[] strict_stopwords;
private Pattern[] strict_blacklist;
private Pattern[] nonstrict_stopwords;
private Pattern[] nonstrict_blacklist;
private final Map<Boolean, Pattern[]> stopwords = new HashMap<Boolean, Pattern[]>(2);
private final Map<Boolean, Pattern[]> blacklist = new HashMap<Boolean, Pattern[]>(2);
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
@ -113,9 +112,9 @@ public class ReleaseInfo {
Pattern[] blacklist;
// initialize cached patterns
synchronized (this) {
stopwords = strict ? strict_stopwords : nonstrict_stopwords;
blacklist = strict ? strict_blacklist : nonstrict_blacklist;
synchronized (this.stopwords) {
stopwords = this.stopwords.get(strict);
blacklist = this.blacklist.get(strict);
if (stopwords == null || blacklist == null) {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
@ -132,13 +131,8 @@ public class ReleaseInfo {
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
// cache compiled patterns for common usage
if (strict) {
strict_stopwords = stopwords;
strict_blacklist = blacklist;
} else {
nonstrict_stopwords = stopwords;
nonstrict_blacklist = blacklist;
}
this.stopwords.put(strict, stopwords);
this.blacklist.put(strict, blacklist);
}
}
@ -182,13 +176,13 @@ public class ReleaseInfo {
public Pattern getLanguageTagPattern(Collection<String> languages) {
// [en]
return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE);
}
public Pattern getLanguageSuffixPattern(Collection<String> languages) {
// .en.srt
return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE);
}
@ -221,13 +215,19 @@ public class ReleaseInfo {
public Pattern getReleaseGroupPattern(boolean strict) throws IOException {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
return compile("(?<!\\p{Alnum})(" + join(releaseGroupResource.get(), "|") + ")(?!\\p{Alnum})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CASE);
}
public Pattern getBlacklistPattern() throws IOException {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ);
return compile("(?<!\\p{Alnum})(" + join(queryBlacklistResource.get(), "|") + ")(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CASE);
}
public Pattern getExcludePattern() throws IOException {
// pattern matching any release group name enclosed in separators
return compile(join(excludeBlacklistResource.get(), "|"), CASE_INSENSITIVE | UNICODE_CASE);
}
@ -246,13 +246,14 @@ public class ReleaseInfo {
}
public FileFilter getClutterFileFilter() {
return new FileFolderNameFilter(compile(getBundle(getClass().getName()).getString("pattern.file.ignore")));
public FileFilter getClutterFileFilter() throws IOException {
return new FileFolderNameFilter(getExcludePattern());
}
// fetch release group names online and try to update the data every other day
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups"));
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));

View File

@ -10,6 +10,9 @@ url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt
# blacklisted terms that will be ignored
url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt
# clutter files that will be ignored
url.exclude-blacklist: http://filebot.sourceforge.net/data/exclude-blacklist.txt
# list of all movies (id, name, year)
url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
@ -18,4 +21,3 @@ url.series-list: http://filebot.sourceforge.net/data/series.list.gz
# disk folder matcher
pattern.diskfolder.entry: ^BDMV$|^HVDVD_TS$|^VIDEO_TS$|^AUDIO_TS$|^VCD$
pattern.file.ignore: (?<!\\p{Alnum})(?i:sample|trailer|extras|deleted.scenes)(?!\\p{Alnum})

View File

@ -102,7 +102,7 @@ public class SeriesNameMatcher {
whitelist.addAll(deepMatchAll(focus, threshold));
// 1. use pattern matching
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ), threshold, false));
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE), threshold, false));
// 2. use common word sequences
seriesNames.addAll(whitelist);

View File

@ -70,7 +70,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
@Override
public List<Match<File, ?>> match(final List<File> files, final SortOrder sortOrder, final Locale locale, final boolean autodetect, final Component parent) throws Exception {
// ignore sample files
List<File> fileset = filter(files, NON_CLUTTER_FILES);
List<File> fileset = filter(files, not(getClutterFileFilter()));
// handle movie files
Set<File> movieFiles = new TreeSet<File>(filter(fileset, VIDEO_FILES));

View File

@ -190,7 +190,6 @@ class SubtitleAutoMatchDialog extends JDialog {
servicePanel.add(component);
}
// remember last user input
private List<String> userQuery = new ArrayList<String>();
@ -263,7 +262,6 @@ class SubtitleAutoMatchDialog extends JDialog {
return null;
}
private final Action downloadAction = new AbstractAction("Download", ResourceManager.getIcon("dialog.continue")) {
@Override
@ -644,7 +642,6 @@ class SubtitleAutoMatchDialog extends JDialog {
}
}
private final PropertyChangeListener selectedOptionListener = new PropertyChangeListener() {
@Override
@ -969,7 +966,7 @@ class SubtitleAutoMatchDialog extends JDialog {
@Override
protected Map<File, List<SubtitleDescriptor>> getSubtitleList(Collection<File> files, String languageName, Component parent) throws Exception {
// ignore clutter files from processing
files = filter(files, NON_CLUTTER_FILES);
files = filter(files, not(getClutterFileFilter()));
// auto-detect query and search for subtitles
Collection<String> querySet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);

View File

@ -0,0 +1,8 @@
!(sample|trailer)
(?<!\p{Alnum})(extras|deleted.scenes)(?!\p{Alnum})
(sample|trailer)-
(sample|trailer)[.]
-(sample|trailer)
\((sample|trailer)\)
\[(sample|trailer)\]
^(sample|trailer)