From 30993a5cb63708429e412fca5eacc7b6bd62844b Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Mon, 14 Nov 2011 11:43:22 +0000 Subject: [PATCH] * clean release info before submitting queries --- .../filebot/format/MediaBindingBean.java | 47 +------- .../filebot/mediainfo/ReleaseInfo.java | 106 ++++++++++++++++++ .../ReleaseInfo.properties} | 5 +- .../filebot/ui/rename/MovieHashMatcher.java | 19 ++-- .../sourceforge/filebot/FileBotTestSuite.java | 3 +- .../filebot/mediainfo/ReleaseInfoTest.java | 31 +++++ 6 files changed, 157 insertions(+), 54 deletions(-) create mode 100644 source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java rename source/net/sourceforge/filebot/{format/MediaBindingBean.properties => mediainfo/ReleaseInfo.properties} (78%) create mode 100644 test/net/sourceforge/filebot/mediainfo/ReleaseInfoTest.java diff --git a/source/net/sourceforge/filebot/format/MediaBindingBean.java b/source/net/sourceforge/filebot/format/MediaBindingBean.java index c02485b9..96edbd03 100644 --- a/source/net/sourceforge/filebot/format/MediaBindingBean.java +++ b/source/net/sourceforge/filebot/format/MediaBindingBean.java @@ -2,28 +2,21 @@ package net.sourceforge.filebot.format; -import static java.util.Arrays.*; -import static java.util.ResourceBundle.*; -import static java.util.regex.Pattern.*; import static net.sourceforge.filebot.MediaTypes.*; import static net.sourceforge.filebot.format.Define.*; import static net.sourceforge.filebot.hash.VerificationUtilities.*; import java.io.File; import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.charset.Charset; import java.util.Scanner; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import net.sf.ehcache.Cache; import net.sf.ehcache.CacheManager; import net.sf.ehcache.Element; import net.sourceforge.filebot.hash.HashType; import net.sourceforge.filebot.mediainfo.MediaInfo; +import net.sourceforge.filebot.mediainfo.ReleaseInfo; import net.sourceforge.filebot.mediainfo.MediaInfo.StreamKind; -import net.sourceforge.filebot.web.CachedResource; import net.sourceforge.filebot.web.Date; import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Movie; @@ -249,21 +242,10 @@ public class MediaBindingBean { public String getVideoSource() { // use inferred media file File inferredMediaFile = getInferredMediaFile(); - - // pattern matching any video source name - Pattern source = compile(getBundle(getClass().getName()).getString("pattern.video.source"), CASE_INSENSITIVE); + ReleaseInfo releaseInfo = new ReleaseInfo(); // look for video source patterns in media file and it's parent folder - String lastMatch = null; - for (File it : asList(inferredMediaFile.getParentFile(), inferredMediaFile)) { - for (String part : it.getName().split("[^\\p{Alnum}]")) { - if (source.matcher(part).matches()) { - lastMatch = part; - } - } - } - - return lastMatch; + return releaseInfo.getVideoSource(inferredMediaFile); } @@ -271,19 +253,10 @@ public class MediaBindingBean { public String getReleaseGroup() throws IOException { // use inferred media file File inferredMediaFile = getInferredMediaFile(); - - // pattern matching any release group name enclosed in separators - Pattern groups = compile("(? releaseGroups = new CachedResource(getBundle(getClass().getName()).getString("url.release-groups"), 24 * 60 * 60 * 1000) { - - @Override - public String process(ByteBuffer data) { - return compile("\\s").matcher(Charset.forName("UTF-8").decode(data)).replaceAll("|"); - } - }; - } diff --git a/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java new file mode 100644 index 00000000..2867fadf --- /dev/null +++ b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.java @@ -0,0 +1,106 @@ + +package net.sourceforge.filebot.mediainfo; + + +import static java.util.ResourceBundle.*; +import static java.util.regex.Pattern.*; +import static net.sourceforge.tuned.StringUtilities.*; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import net.sourceforge.filebot.web.CachedResource; + + +public class ReleaseInfo { + + public String getVideoSource(File file) { + // check parent and itself for group names + return matchLast(getVideoSourcePattern(), file.getParent(), file.getName()); + } + + + public String getReleaseGroup(File file) throws IOException { + // check parent and itself for group names + return matchLast(getReleaseGroupPattern(), file.getParent(), file.getName()); + } + + + protected String matchLast(Pattern pattern, CharSequence... sequence) { + String lastMatch = null; + + for (CharSequence name : sequence) { + if (name == null) + continue; + + Matcher matcher = pattern.matcher(name); + while (matcher.find()) { + lastMatch = matcher.group(); + } + } + + return lastMatch; + } + + + public List clean(Iterable items) { + return clean(items, getVideoSourcePattern(), getCodecPattern()); + } + + + public List cleanRG(Iterable items) throws IOException { + return clean(items, getReleaseGroupPattern(), getVideoSourcePattern(), getCodecPattern()); + } + + + public List clean(Iterable items, Pattern... blacklisted) { + List cleaned = new ArrayList(); + + for (String string : items) { + for (Pattern it : blacklisted) { + string = it.matcher(string).replaceAll(""); + } + + cleaned.add(string.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim()); + } + + return cleaned; + } + + + public Pattern getCodecPattern() { + // pattern matching any video source name + String pattern = getBundle(getClass().getName()).getString("pattern.codec"); + return compile("(? releaseGroupResource = new CachedResource(getBundle(getClass().getName()).getString("url.release-groups"), 24 * 60 * 60 * 1000) { + + @Override + public String[] process(ByteBuffer data) { + return compile("\\s").split(Charset.forName("UTF-8").decode(data)); + } + }; + +} diff --git a/source/net/sourceforge/filebot/format/MediaBindingBean.properties b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.properties similarity index 78% rename from source/net/sourceforge/filebot/format/MediaBindingBean.properties rename to source/net/sourceforge/filebot/mediainfo/ReleaseInfo.properties index a96f60a3..ccd74eb2 100644 --- a/source/net/sourceforge/filebot/format/MediaBindingBean.properties +++ b/source/net/sourceforge/filebot/mediainfo/ReleaseInfo.properties @@ -1,5 +1,8 @@ # source names mostly copied from [http://en.wikipedia.org/wiki/Pirated_movie_release_types] pattern.video.source: CAMRip|CAM|TS|TELESYNC|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R5|R5LINE|DVDRip|DVDR|TVRip|DSR|PDTV|HDTV|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR +# additional release info patterns +pattern.codec: DivX|Xvid|AVC|x264|h264|3ivx|mpeg|mpeg4|mp3|aac|2ch|6ch|720p|1080p + # group names mostly copied from [http://scenelingo.wordpress.com/list-of-scene-release-groups] -url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt \ No newline at end of file +url.release-groups: http://filebot.sourceforge.net/data/release-groups.txt diff --git a/source/net/sourceforge/filebot/ui/rename/MovieHashMatcher.java b/source/net/sourceforge/filebot/ui/rename/MovieHashMatcher.java index 887c2836..8d14768c 100644 --- a/source/net/sourceforge/filebot/ui/rename/MovieHashMatcher.java +++ b/source/net/sourceforge/filebot/ui/rename/MovieHashMatcher.java @@ -13,11 +13,11 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -34,6 +34,7 @@ import javax.swing.Action; import javax.swing.SwingUtilities; import net.sourceforge.filebot.Analytics; +import net.sourceforge.filebot.mediainfo.ReleaseInfo; import net.sourceforge.filebot.similarity.Match; import net.sourceforge.filebot.ui.SelectDialog; import net.sourceforge.filebot.web.Movie; @@ -160,11 +161,6 @@ class MovieHashMatcher implements AutoCompleteMatcher { } - private String normalizeMovieName(File movie) { - return getName(movie).replaceAll("\\p{Punct}+", " ").trim(); - } - - protected Movie grabMovieName(File movieFile, Locale locale, boolean autodetect, Movie... suggestions) throws Exception { List options = new ArrayList(); @@ -185,9 +181,12 @@ class MovieHashMatcher implements AutoCompleteMatcher { } // search by file name or folder name - Set searchQueries = new LinkedHashSet(2); - searchQueries.add(normalizeMovieName(movieFile)); - searchQueries.add(normalizeMovieName(movieFile.getParentFile())); + Collection searchQueries = new TreeSet(String.CASE_INSENSITIVE_ORDER); + searchQueries.add(getName(movieFile)); + searchQueries.add(getName(movieFile.getParentFile())); + + // remove blacklisted terms + searchQueries = new ReleaseInfo().cleanRG(searchQueries); for (String query : searchQueries) { if (autodetect && options.isEmpty()) { @@ -197,7 +196,7 @@ class MovieHashMatcher implements AutoCompleteMatcher { // allow manual user input if (options.isEmpty() || !autodetect) { - String suggestion = options.isEmpty() ? normalizeMovieName(movieFile) : options.get(0).getName(); + String suggestion = options.isEmpty() ? searchQueries.iterator().next() : options.get(0).getName(); String input = showInputDialog(null, "Enter movie name:", suggestion); if (input != null) { diff --git a/test/net/sourceforge/filebot/FileBotTestSuite.java b/test/net/sourceforge/filebot/FileBotTestSuite.java index 681e7d3f..951fe68f 100644 --- a/test/net/sourceforge/filebot/FileBotTestSuite.java +++ b/test/net/sourceforge/filebot/FileBotTestSuite.java @@ -8,6 +8,7 @@ import org.junit.runners.Suite.SuiteClasses; import net.sourceforge.filebot.format.ExpressionFormatTest; import net.sourceforge.filebot.hash.VerificationFormatTest; +import net.sourceforge.filebot.mediainfo.ReleaseInfoTest; import net.sourceforge.filebot.similarity.SimilarityTestSuite; import net.sourceforge.filebot.subtitle.SubtitleReaderTestSuite; import net.sourceforge.filebot.ui.rename.MatchModelTest; @@ -16,7 +17,7 @@ import net.sourceforge.filebot.web.WebTestSuite; @RunWith(Suite.class) -@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, MatchSimilarityMetricTest.class, SubtitleReaderTestSuite.class }) +@SuiteClasses( { SimilarityTestSuite.class, WebTestSuite.class, ExpressionFormatTest.class, VerificationFormatTest.class, MatchModelTest.class, MatchSimilarityMetricTest.class, SubtitleReaderTestSuite.class, ReleaseInfoTest.class }) public class FileBotTestSuite { } diff --git a/test/net/sourceforge/filebot/mediainfo/ReleaseInfoTest.java b/test/net/sourceforge/filebot/mediainfo/ReleaseInfoTest.java new file mode 100644 index 00000000..02d962f9 --- /dev/null +++ b/test/net/sourceforge/filebot/mediainfo/ReleaseInfoTest.java @@ -0,0 +1,31 @@ + +package net.sourceforge.filebot.mediainfo; + + +import static org.junit.Assert.*; + +import java.io.File; + +import org.junit.Test; + + +public class ReleaseInfoTest { + + @Test + public void getVideoSource() { + ReleaseInfo info = new ReleaseInfo(); + File f = new File("Jurassic.Park[1993]DvDrip-aXXo.avi"); + + assertEquals("DvDrip", info.getVideoSource(f)); + } + + + @Test + public void getReleaseGroup() throws Exception { + ReleaseInfo info = new ReleaseInfo(); + File f = new File("Jurassic.Park[1993]DvDrip-aXXo.avi"); + + assertEquals("aXXo", info.getReleaseGroup(f)); + } + +}