mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-24 08:48:51 -05:00
* improved auto-detection for date-based episodes
This commit is contained in:
parent
517fa36038
commit
b2fbba3a2d
@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
||||
|
||||
for (File f : mediaFiles) {
|
||||
// count SxE matches
|
||||
if (nameMatcher.matchBySeasonEpisodePattern(f.getName()) != null) {
|
||||
if (nameMatcher.matchByEpisodeIdentifier(f.getName()) != null) {
|
||||
sxe++;
|
||||
}
|
||||
|
||||
|
98
source/net/sourceforge/filebot/similarity/DateMatcher.java
Normal file
98
source/net/sourceforge/filebot/similarity/DateMatcher.java
Normal file
@ -0,0 +1,98 @@
|
||||
|
||||
package net.sourceforge.filebot.similarity;
|
||||
|
||||
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sourceforge.filebot.web.Date;
|
||||
|
||||
|
||||
public class DateMatcher {
|
||||
|
||||
private final DatePattern[] patterns;
|
||||
|
||||
|
||||
public DateMatcher() {
|
||||
patterns = new DatePattern[2];
|
||||
|
||||
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
|
||||
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
|
||||
|
||||
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
|
||||
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
|
||||
}
|
||||
|
||||
|
||||
public DateMatcher(DatePattern... patterns) {
|
||||
this.patterns = patterns;
|
||||
}
|
||||
|
||||
|
||||
public Date match(CharSequence seq) {
|
||||
for (DatePattern pattern : patterns) {
|
||||
Date match = pattern.match(seq);
|
||||
|
||||
if (match != null) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public int find(CharSequence seq, int fromIndex) {
|
||||
for (DatePattern pattern : patterns) {
|
||||
int pos = pattern.find(seq, fromIndex);
|
||||
|
||||
if (pos >= 0) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
private static class DatePattern {
|
||||
|
||||
protected final Pattern pattern;
|
||||
protected final int[] order;
|
||||
|
||||
|
||||
public DatePattern(String pattern, int[] order) {
|
||||
this.pattern = Pattern.compile(pattern);
|
||||
this.order = order;
|
||||
}
|
||||
|
||||
|
||||
protected Date process(MatchResult match) {
|
||||
return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
|
||||
}
|
||||
|
||||
|
||||
public Date match(CharSequence seq) {
|
||||
Matcher matcher = pattern.matcher(seq);
|
||||
|
||||
if (matcher.find()) {
|
||||
return process(matcher);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public int find(CharSequence seq, int fromIndex) {
|
||||
Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length());
|
||||
|
||||
if (matcher.find()) {
|
||||
return matcher.start();
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -3,26 +3,22 @@ package net.sourceforge.filebot.similarity;
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sourceforge.filebot.web.Date;
|
||||
|
||||
|
||||
public class DateMetric implements SimilarityMetric {
|
||||
|
||||
private final DatePattern[] patterns;
|
||||
private final DateMatcher matcher;
|
||||
|
||||
|
||||
public DateMetric() {
|
||||
patterns = new DatePattern[2];
|
||||
this.matcher = new DateMatcher();
|
||||
}
|
||||
|
||||
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
|
||||
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
|
||||
|
||||
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
|
||||
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
|
||||
public DateMetric(DateMatcher matcher) {
|
||||
this.matcher = matcher;
|
||||
}
|
||||
|
||||
|
||||
@ -46,49 +42,7 @@ public class DateMetric implements SimilarityMetric {
|
||||
object = ((File) object).getName();
|
||||
}
|
||||
|
||||
return match(object.toString());
|
||||
}
|
||||
|
||||
|
||||
public Date match(CharSequence name) {
|
||||
for (DatePattern pattern : patterns) {
|
||||
Date match = pattern.match(name);
|
||||
|
||||
if (match != null) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
protected static class DatePattern {
|
||||
|
||||
protected final Pattern pattern;
|
||||
protected final int[] order;
|
||||
|
||||
|
||||
public DatePattern(String pattern, int[] order) {
|
||||
this.pattern = Pattern.compile(pattern);
|
||||
this.order = order;
|
||||
}
|
||||
|
||||
|
||||
protected Date process(MatchResult match) {
|
||||
return new Date(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
|
||||
}
|
||||
|
||||
|
||||
public Date match(CharSequence name) {
|
||||
Matcher matcher = pattern.matcher(name);
|
||||
|
||||
if (matcher.find()) {
|
||||
return process(matcher);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
return matcher.match(object.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ public class SeasonEpisodeMatcher {
|
||||
patterns[0] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee](\\d{1,3})(?!\\p{Digit})");
|
||||
|
||||
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ...
|
||||
patterns[1] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
|
||||
patterns[1] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[x.](\\d{2,3})(?!\\p{Digit})");
|
||||
|
||||
// match patterns like 01, 102, 1003 (enclosed in separators)
|
||||
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-1]?\\d?)(\\d{2})(?!\\p{Alnum})") {
|
||||
|
@ -31,6 +31,7 @@ import net.sourceforge.tuned.FileUtilities;
|
||||
public class SeriesNameMatcher {
|
||||
|
||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(new SeasonEpisodeFilter(30, 50, -1), true);
|
||||
protected DateMatcher dateMatcher = new DateMatcher();
|
||||
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||
|
||||
protected int commonWordSequenceMaxStartIndex;
|
||||
@ -83,12 +84,17 @@ public class SeriesNameMatcher {
|
||||
// match common word sequences (likely series names)
|
||||
SeriesNameCollection whitelist = new SeriesNameCollection();
|
||||
|
||||
// focus chars before the SxE pattern when matching by common word sequence
|
||||
// focus chars before the SxE / Date pattern when matching by common word sequence
|
||||
String[] focus = Arrays.copyOf(names, names.length);
|
||||
for (int i = 0; i < focus.length; i++) {
|
||||
int pos = seasonEpisodeMatcher.find(focus[i], 0);
|
||||
if (pos >= 0) {
|
||||
focus[i] = focus[i].substring(0, pos);
|
||||
int sxePos = seasonEpisodeMatcher.find(focus[i], 0);
|
||||
if (sxePos >= 0) {
|
||||
focus[i] = focus[i].substring(0, sxePos);
|
||||
} else {
|
||||
int datePos = dateMatcher.find(focus[i], 0);
|
||||
if (datePos >= 0) {
|
||||
focus[i] = focus[i].substring(0, datePos);
|
||||
}
|
||||
}
|
||||
}
|
||||
whitelist.addAll(deepMatchAll(focus, threshold));
|
||||
@ -118,8 +124,9 @@ public class SeriesNameMatcher {
|
||||
name = normalize(name);
|
||||
|
||||
Matcher prefix = prefixPattern.matcher(name);
|
||||
int sxePosition = seasonEpisodeMatcher.find(name, prefix.find() ? prefix.end() : 0);
|
||||
int prefixEnd = prefix.find() ? prefix.end() : 0;
|
||||
|
||||
int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd);
|
||||
if (sxePosition > 0) {
|
||||
String hit = name.substring(0, sxePosition).trim();
|
||||
List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
|
||||
@ -131,7 +138,14 @@ public class SeriesNameMatcher {
|
||||
// require multiple matches, if hit might be a false match
|
||||
thresholdCollection.add(hit);
|
||||
}
|
||||
} else {
|
||||
// try date pattern as fallback
|
||||
int datePosition = dateMatcher.find(name, prefixEnd);
|
||||
if (datePosition > 0) {
|
||||
thresholdCollection.addDirect(name.substring(0, datePosition).trim());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return thresholdCollection;
|
||||
@ -176,14 +190,19 @@ public class SeriesNameMatcher {
|
||||
* @return a substring of the given name that ends before the first occurrence of a season
|
||||
* episode pattern, or null if there is no such pattern
|
||||
*/
|
||||
public String matchBySeasonEpisodePattern(String name) {
|
||||
public String matchByEpisodeIdentifier(String name) {
|
||||
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
||||
|
||||
if (seasonEpisodePosition > 0) {
|
||||
// series name ends at the first season episode pattern
|
||||
return normalize(name.substring(0, seasonEpisodePosition));
|
||||
}
|
||||
|
||||
int datePosition = dateMatcher.find(name, 0);
|
||||
if (datePosition > 0) {
|
||||
// series name ends at the first season episode pattern
|
||||
return normalize(name.substring(0, datePosition));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -233,7 +233,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
||||
|
||||
// require user input if auto-detection has failed or has been disabled
|
||||
if (episodes.isEmpty()) {
|
||||
String suggestion = new SeriesNameMatcher().matchBySeasonEpisodePattern(getName(files.get(0)));
|
||||
String suggestion = new SeriesNameMatcher().matchByEpisodeIdentifier(getName(files.get(0)));
|
||||
if (suggestion != null) {
|
||||
// clean media info / release group info / etc
|
||||
suggestion = stripReleaseInfo(suggestion);
|
||||
|
@ -34,10 +34,10 @@ public class SeriesNameMatcherTest {
|
||||
|
||||
@Test
|
||||
public void matchBeforeSeasonEpisodePattern() {
|
||||
assertEquals("The Test", matcher.matchBySeasonEpisodePattern("The Test - 1x01"));
|
||||
assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01"));
|
||||
|
||||
// real world test
|
||||
assertEquals("Mushishi", matcher.matchBySeasonEpisodePattern("[niizk]_Mushishi_-_01_-_The_Green_Gathering"));
|
||||
assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_01_-_The_Green_Gathering"));
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user