From 3f9c0ab67e90d6bd3d2ac46918511b9273df2783 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 26 Jul 2012 08:45:15 +0000 Subject: [PATCH] * switch to using an online exclude pattern list that can be updated anytime for everybody --- BuildData.groovy | 1 + source/net/sourceforge/filebot/Main.java | 10 +++-- .../filebot/cli/CmdlineOperations.java | 2 +- .../filebot/media/MediaDetection.java | 28 ++++++++++--- .../filebot/media/ReleaseInfo.java | 41 ++++++++++--------- .../filebot/media/ReleaseInfo.properties | 4 +- .../filebot/similarity/SeriesNameMatcher.java | 2 +- .../filebot/ui/rename/MovieHashMatcher.java | 2 +- .../ui/subtitle/SubtitleAutoMatchDialog.java | 5 +-- website/data/exclude-blacklist.txt | 8 ++++ 10 files changed, 66 insertions(+), 37 deletions(-) create mode 100644 website/data/exclude-blacklist.txt diff --git a/BuildData.groovy b/BuildData.groovy index af9dbe18..430bc034 100644 --- a/BuildData.groovy +++ b/BuildData.groovy @@ -16,6 +16,7 @@ def sortRegexList(path) { // sort and check shared regex collections sortRegexList("website/data/release-groups.txt") sortRegexList("website/data/query-blacklist.txt") +sortRegexList("website/data/exclude-blacklist.txt") // ------------------------------------------------------------------------- // diff --git a/source/net/sourceforge/filebot/Main.java b/source/net/sourceforge/filebot/Main.java index 6347043d..d39cb000 100644 --- a/source/net/sourceforge/filebot/Main.java +++ b/source/net/sourceforge/filebot/Main.java @@ -38,9 +38,6 @@ import javax.swing.SwingUtilities; import javax.swing.UIManager; import javax.xml.parsers.DocumentBuilderFactory; -import org.kohsuke.args4j.CmdLineException; -import org.w3c.dom.NodeList; - import net.miginfocom.swing.MigLayout; import net.sf.ehcache.CacheManager; import net.sourceforge.filebot.cli.ArgumentBean; @@ -56,6 +53,9 @@ import net.sourceforge.filebot.web.CachedResource; import net.sourceforge.tuned.ByteBufferInputStream; import net.sourceforge.tuned.PreferencesMap.PreferencesEntry; +import org.kohsuke.args4j.CmdLineException; +import org.w3c.dom.NodeList; + public class Main { @@ -264,6 +264,10 @@ public class Main { @Override public void run() { try { + // pre-load filter data + MediaDetection.getClutterFileFilter(); + MediaDetection.getDiskFolderFilter(); + // pre-load movie/series index List dummy = Collections.singletonList(""); MediaDetection.stripReleaseInfo(dummy, true); diff --git a/source/net/sourceforge/filebot/cli/CmdlineOperations.java b/source/net/sourceforge/filebot/cli/CmdlineOperations.java index b48a0e68..104bf51f 100644 --- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java +++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java @@ -292,7 +292,7 @@ public class CmdlineOperations implements CmdlineInterface { CLILogger.config(format("Rename movies using [%s]", service.getName())); // ignore sample files - List fileset = filter(files, NON_CLUTTER_FILES); + List fileset = filter(files, not(getClutterFileFilter())); // handle movie files Set movieFiles = new TreeSet(filter(fileset, VIDEO_FILES)); diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index 076eaba6..02303ec0 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -61,17 +61,33 @@ public class MediaDetection { public static final ReleaseInfo releaseInfo = new ReleaseInfo(); - public static final FileFilter DISK_FOLDERS = releaseInfo.getDiskFolderFilter(); - public static final FileFilter NON_CLUTTER_FILES = not(releaseInfo.getClutterFileFilter()); + private static FileFilter diskFolder; + private static FileFilter clutterFile; - public static boolean isDiskFolder(File folder) { - return DISK_FOLDERS.accept(folder); + public static FileFilter getDiskFolderFilter() { + if (diskFolder == null) { + diskFolder = releaseInfo.getDiskFolderFilter(); + } + return diskFolder; } - public static boolean isNonClutter(File file) { - return NON_CLUTTER_FILES.accept(file); + public static FileFilter getClutterFileFilter() throws IOException { + if (clutterFile == null) { + clutterFile = releaseInfo.getClutterFileFilter(); + } + return clutterFile; + } + + + public static boolean isDiskFolder(File folder) { + return getDiskFolderFilter().accept(folder); + } + + + public static boolean isClutterFile(File file) throws IOException { + return getClutterFileFilter().accept(file); } diff --git a/source/net/sourceforge/filebot/media/ReleaseInfo.java b/source/net/sourceforge/filebot/media/ReleaseInfo.java index 810d0522..71435cc7 100644 --- a/source/net/sourceforge/filebot/media/ReleaseInfo.java +++ b/source/net/sourceforge/filebot/media/ReleaseInfo.java @@ -22,6 +22,7 @@ import java.text.Normalizer.Form; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; @@ -102,10 +103,8 @@ public class ReleaseInfo { } // cached patterns - private Pattern[] strict_stopwords; - private Pattern[] strict_blacklist; - private Pattern[] nonstrict_stopwords; - private Pattern[] nonstrict_blacklist; + private final Map stopwords = new HashMap(2); + private final Map blacklist = new HashMap(2); public List cleanRelease(Collection items, boolean strict) throws IOException { @@ -113,9 +112,9 @@ public class ReleaseInfo { Pattern[] blacklist; // initialize cached patterns - synchronized (this) { - stopwords = strict ? strict_stopwords : nonstrict_stopwords; - blacklist = strict ? strict_blacklist : nonstrict_blacklist; + synchronized (this.stopwords) { + stopwords = this.stopwords.get(strict); + blacklist = this.blacklist.get(strict); if (stopwords == null || blacklist == null) { Set languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet(); @@ -132,13 +131,8 @@ public class ReleaseInfo { blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist }; // cache compiled patterns for common usage - if (strict) { - strict_stopwords = stopwords; - strict_blacklist = blacklist; - } else { - nonstrict_stopwords = stopwords; - nonstrict_blacklist = blacklist; - } + this.stopwords.put(strict, stopwords); + this.blacklist.put(strict, blacklist); } } @@ -182,13 +176,13 @@ public class ReleaseInfo { public Pattern getLanguageTagPattern(Collection languages) { // [en] - return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ); + return compile("(?<=[-\\[{(])(" + join(quoteAll(languages), "|") + ")(?=\\p{Punct})", CASE_INSENSITIVE | UNICODE_CASE); } public Pattern getLanguageSuffixPattern(Collection languages) { // .en.srt - return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE | CANON_EQ); + return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE); } @@ -221,13 +215,19 @@ public class ReleaseInfo { public Pattern getReleaseGroupPattern(boolean strict) throws IOException { // pattern matching any release group name enclosed in separators - return compile("(? releaseGroupResource = new PatternResource(getBundle(getClass().getName()).getString("url.release-groups")); protected final CachedResource queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist")); + protected final CachedResource excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist")); protected final CachedResource movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list")); protected final CachedResource seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list")); diff --git a/source/net/sourceforge/filebot/media/ReleaseInfo.properties b/source/net/sourceforge/filebot/media/ReleaseInfo.properties index 169e5778..dbc0085b 100644 --- a/source/net/sourceforge/filebot/media/ReleaseInfo.properties +++ b/source/net/sourceforge/filebot/media/ReleaseInfo.properties @@ -10,6 +10,9 @@ url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt # blacklisted terms that will be ignored url.query-blacklist: http://filebot.sourceforge.net/data/query-blacklist.txt +# clutter files that will be ignored +url.exclude-blacklist: http://filebot.sourceforge.net/data/exclude-blacklist.txt + # list of all movies (id, name, year) url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz @@ -18,4 +21,3 @@ url.series-list: http://filebot.sourceforge.net/data/series.list.gz # disk folder matcher pattern.diskfolder.entry: ^BDMV$|^HVDVD_TS$|^VIDEO_TS$|^AUDIO_TS$|^VCD$ -pattern.file.ignore: (?> match(final List files, final SortOrder sortOrder, final Locale locale, final boolean autodetect, final Component parent) throws Exception { // ignore sample files - List fileset = filter(files, NON_CLUTTER_FILES); + List fileset = filter(files, not(getClutterFileFilter())); // handle movie files Set movieFiles = new TreeSet(filter(fileset, VIDEO_FILES)); diff --git a/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java b/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java index c4a6bb77..44c341ca 100644 --- a/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java +++ b/source/net/sourceforge/filebot/ui/subtitle/SubtitleAutoMatchDialog.java @@ -190,7 +190,6 @@ class SubtitleAutoMatchDialog extends JDialog { servicePanel.add(component); } - // remember last user input private List userQuery = new ArrayList(); @@ -263,7 +262,6 @@ class SubtitleAutoMatchDialog extends JDialog { return null; } - private final Action downloadAction = new AbstractAction("Download", ResourceManager.getIcon("dialog.continue")) { @Override @@ -644,7 +642,6 @@ class SubtitleAutoMatchDialog extends JDialog { } } - private final PropertyChangeListener selectedOptionListener = new PropertyChangeListener() { @Override @@ -969,7 +966,7 @@ class SubtitleAutoMatchDialog extends JDialog { @Override protected Map> getSubtitleList(Collection files, String languageName, Component parent) throws Exception { // ignore clutter files from processing - files = filter(files, NON_CLUTTER_FILES); + files = filter(files, not(getClutterFileFilter())); // auto-detect query and search for subtitles Collection querySet = new TreeSet(String.CASE_INSENSITIVE_ORDER); diff --git a/website/data/exclude-blacklist.txt b/website/data/exclude-blacklist.txt new file mode 100644 index 00000000..0bb4bb3f --- /dev/null +++ b/website/data/exclude-blacklist.txt @@ -0,0 +1,8 @@ +!(sample|trailer) +(?