From b2fbba3a2dc16d60814a54f35371f42e68d6bd2a Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 9 Feb 2012 13:50:14 +0000 Subject: [PATCH] * improved auto-detection for date-based episodes --- .../filebot/cli/CmdlineOperations.java | 2 +- .../filebot/similarity/DateMatcher.java | 98 +++++++++++++++++++ .../filebot/similarity/DateMetric.java | 62 ++---------- .../similarity/SeasonEpisodeMatcher.java | 2 +- .../filebot/similarity/SeriesNameMatcher.java | 33 +++++-- .../filebot/ui/rename/EpisodeListMatcher.java | 2 +- .../similarity/SeriesNameMatcherTest.java | 4 +- 7 files changed, 137 insertions(+), 66 deletions(-) create mode 100644 source/net/sourceforge/filebot/similarity/DateMatcher.java diff --git a/source/net/sourceforge/filebot/cli/CmdlineOperations.java b/source/net/sourceforge/filebot/cli/CmdlineOperations.java index e4021de6..fa043c2e 100644 --- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java +++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java @@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface { for (File f : mediaFiles) { // count SxE matches - if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) { + if (nameMatcher.matchByEpisodeIdentifier(f.getName()) != null) { sxe++; } diff --git a/source/net/sourceforge/filebot/similarity/DateMatcher.java b/source/net/sourceforge/filebot/similarity/DateMatcher.java new file mode 100644 index 00000000..3f215018 --- /dev/null +++ b/source/net/sourceforge/filebot/similarity/DateMatcher.java @@ -0,0 +1,98 @@ + +package net.sourceforge.filebot.similarity; + + +import java.util.regex.MatchResult; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import net.sourceforge.filebot.web.Date; + + +public class DateMatcher { + + private final DatePattern[] patterns; + + + public DateMatcher() { + patterns = new DatePattern[2]; + + // match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc. + patterns[0] = new DatePattern("(?= 0) { + return pos; + } + } + + return -1; + } + + + private static class DatePattern { + + protected final Pattern pattern; + protected final int[] order; + + + public DatePattern(String pattern, int[] order) { + this.pattern = Pattern.compile(pattern); + this.order = order; + } + + + protected Date process(MatchResult match) { + return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2]))); + } + + + public Date match(CharSequence seq) { + Matcher matcher = pattern.matcher(seq); + + if (matcher.find()) { + return process(matcher); + } + + return null; + } + + + public int find(CharSequence seq, int fromIndex) { + Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length()); + + if (matcher.find()) { + return matcher.start(); + } + + return -1; + } + } + +} diff --git a/source/net/sourceforge/filebot/similarity/DateMetric.java b/source/net/sourceforge/filebot/similarity/DateMetric.java index f58ac212..68f8e6ed 100644 --- a/source/net/sourceforge/filebot/similarity/DateMetric.java +++ b/source/net/sourceforge/filebot/similarity/DateMetric.java @@ -3,26 +3,22 @@ package net.sourceforge.filebot.similarity; import java.io.File; -import java.util.regex.MatchResult; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import net.sourceforge.filebot.web.Date; public class DateMetric implements SimilarityMetric { - private final DatePattern[] patterns; + private final DateMatcher matcher; public DateMetric() { - patterns = new DatePattern[2]; - - // match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc. - patterns[0] = new DatePattern("(?= 0) { - focus[i] = focus[i].substring(0, pos); + int sxePos = seasonEpisodeMatcher.find(focus[i], 0); + if (sxePos >= 0) { + focus[i] = focus[i].substring(0, sxePos); + } else { + int datePos = dateMatcher.find(focus[i], 0); + if (datePos >= 0) { + focus[i] = focus[i].substring(0, datePos); + } } } whitelist.addAll(deepMatchAll(focus, threshold)); @@ -118,8 +124,9 @@ public class SeriesNameMatcher { name = normalize(name); Matcher prefix = prefixPattern.matcher(name); - int sxePosition = seasonEpisodeMatcher.find(name, prefix.find() ? prefix.end() : 0); + int prefixEnd = prefix.find() ? prefix.end() : 0; + int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd); if (sxePosition > 0) { String hit = name.substring(0, sxePosition).trim(); List sxe = seasonEpisodeMatcher.match(name.substring(sxePosition)); @@ -131,7 +138,14 @@ public class SeriesNameMatcher { // require multiple matches, if hit might be a false match thresholdCollection.add(hit); } + } else { + // try date pattern as fallback + int datePosition = dateMatcher.find(name, prefixEnd); + if (datePosition > 0) { + thresholdCollection.addDirect(name.substring(0, datePosition).trim()); + } } + } return thresholdCollection; @@ -176,14 +190,19 @@ public class SeriesNameMatcher { * @return a substring of the given name that ends before the first occurrence of a season * episode pattern, or null if there is no such pattern */ - public String matchBySeasonEpisodePattern(String name) { + public String matchByEpisodeIdentifier(String name) { int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0); - if (seasonEpisodePosition > 0) { // series name ends at the first season episode pattern return normalize(name.substring(0, seasonEpisodePosition)); } + int datePosition = dateMatcher.find(name, 0); + if (datePosition > 0) { + // series name ends at the first season episode pattern + return normalize(name.substring(0, datePosition)); + } + return null; } diff --git a/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java b/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java index aa722e15..f1965f1a 100644 --- a/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java +++ b/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java @@ -233,7 +233,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher { // require user input if auto-detection has failed or has been disabled if (episodes.isEmpty()) { - String suggestion = new SeriesNameMatcher().matchBySeasonEpisodePattern(getName(files.get(0))); + String suggestion = new SeriesNameMatcher().matchByEpisodeIdentifier(getName(files.get(0))); if (suggestion != null) { // clean media info / release group info / etc suggestion = stripReleaseInfo(suggestion); diff --git a/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java b/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java index b44eb46b..6e791de5 100644 --- a/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java +++ b/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java @@ -34,10 +34,10 @@ public class SeriesNameMatcherTest { @Test public void matchBeforeSeasonEpisodePattern() { - assertEquals("The Test", matcher.matchBySeasonEpisodePattern("The Test - 1x01")); + assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01")); // real world test - assertEquals("Mushishi", matcher.matchBySeasonEpisodePattern("[niizk]_Mushishi_-_01_-_The_Green_Gathering")); + assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_01_-_The_Green_Gathering")); }