2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
package net.sourceforge.filebot.similarity;
|
|
|
|
|
|
|
|
|
2013-09-11 13:22:00 -04:00
|
|
|
import static java.util.Arrays.*;
|
|
|
|
import static java.util.Collections.*;
|
|
|
|
import static java.util.regex.Pattern.*;
|
2012-02-10 12:14:38 -05:00
|
|
|
|
2013-04-18 06:03:41 -04:00
|
|
|
import java.io.File;
|
2009-01-24 19:08:57 -05:00
|
|
|
import java.util.ArrayList;
|
2009-07-16 08:06:51 -04:00
|
|
|
import java.util.Arrays;
|
2009-07-23 10:25:43 -04:00
|
|
|
import java.util.Collection;
|
2009-01-24 19:08:57 -05:00
|
|
|
import java.util.List;
|
2012-03-17 15:02:04 -04:00
|
|
|
import java.util.Scanner;
|
2009-07-23 10:25:43 -04:00
|
|
|
import java.util.regex.MatchResult;
|
2009-01-24 19:08:57 -05:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
|
|
|
|
public class SeasonEpisodeMatcher {
|
|
|
|
|
2012-03-04 19:49:11 -05:00
|
|
|
public static final SeasonEpisodeFilter DEFAULT_SANITY = new SeasonEpisodeFilter(50, 50, 1000);
|
2012-02-19 22:16:42 -05:00
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
private SeasonEpisodePattern[] patterns;
|
2013-04-18 06:03:41 -04:00
|
|
|
private Pattern seasonPattern;
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
|
|
|
public SeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) {
|
2012-03-04 19:49:11 -05:00
|
|
|
patterns = new SeasonEpisodePattern[5];
|
|
|
|
|
|
|
|
// match patterns like Season 01 Episode 02, ...
|
2012-05-29 23:02:26 -04:00
|
|
|
patterns[0] = new SeasonEpisodePattern(null, "(?<!\\p{Alnum})(?i:season|series)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?i:episode)[^\\p{Alnum}]{0,3}(\\d{1,4})[^\\p{Alnum}]{0,3}(?!\\p{Digit})");
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2012-03-17 15:02:04 -04:00
|
|
|
// match patterns like S01E01, s01e02, ... [s01]_[e02], s01.e02, s01e02a, s2010e01 ... s01e01-02-03-04, [s01]_[e01-02-03-04] ...
|
2013-02-01 03:12:15 -05:00
|
|
|
patterns[1] = new SeasonEpisodePattern(null, "(?<!\\p{Digit})[Ss](\\d{1,2}|\\d{4})[^\\p{Alnum}]{0,3}[Ee][Pp]?(((?<=[^._ ])[Ee]?[Pp]?\\d{1,3}(\\D|$))+)") {
|
2012-03-17 15:02:04 -04:00
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Collection<SxE> process(MatchResult match) {
|
|
|
|
List<SxE> matches = new ArrayList<SxE>(2);
|
|
|
|
Scanner epno = new Scanner(match.group(2)).useDelimiter("\\D+");
|
|
|
|
while (epno.hasNext()) {
|
|
|
|
matches.add(new SxE(match.group(1), epno.next()));
|
|
|
|
}
|
|
|
|
return matches;
|
|
|
|
}
|
|
|
|
};
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2012-03-17 15:02:04 -04:00
|
|
|
// match patterns like 1x01, 1.02, ..., 1x01a, 10x01, 10.02, ... 1x01-02-03-04, 1x01x02x03x04 ...
|
2013-01-03 05:02:15 -05:00
|
|
|
patterns[2] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum}|\\d{4}[.])(\\d{1,2})[xe.](((?<=[^._ ])\\d{2,3}(\\D|$))+)") {
|
2012-03-17 15:02:04 -04:00
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Collection<SxE> process(MatchResult match) {
|
|
|
|
List<SxE> matches = new ArrayList<SxE>(2);
|
|
|
|
Scanner epno = new Scanner(match.group(2)).useDelimiter("\\D+");
|
|
|
|
while (epno.hasNext()) {
|
|
|
|
matches.add(new SxE(match.group(1), epno.next()));
|
|
|
|
}
|
|
|
|
return matches;
|
|
|
|
}
|
|
|
|
};
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2012-02-10 12:14:38 -05:00
|
|
|
// match patterns like ep1, ep.1, ...
|
2012-06-28 00:04:37 -04:00
|
|
|
patterns[3] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})(?i:e|ep|episode)[^\\p{Alnum}]{0,3}(\\d{1,3})(?!\\p{Digit})") {
|
2012-02-10 12:14:38 -05:00
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Collection<SxE> process(MatchResult match) {
|
|
|
|
// regex doesn't match season
|
|
|
|
return singleton(new SxE(null, match.group(1)));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
// match patterns like 01, 102, 1003 (enclosed in separators)
|
2013-08-31 02:51:30 -04:00
|
|
|
patterns[4] = new SeasonEpisodePattern(sanity, "(?<!\\p{Alnum})([0-2]?\\d?)(\\d{2})(?!\\p{Alnum})") {
|
2009-07-23 10:25:43 -04:00
|
|
|
|
|
|
|
@Override
|
|
|
|
protected Collection<SxE> process(MatchResult match) {
|
|
|
|
// interpret match as season and episode
|
|
|
|
SxE seasonEpisode = new SxE(match.group(1), match.group(2));
|
|
|
|
|
|
|
|
// interpret match as episode number only
|
2010-11-09 03:04:12 -05:00
|
|
|
SxE absoluteEpisode = new SxE(null, match.group(1) + match.group(2));
|
2009-07-23 10:25:43 -04:00
|
|
|
|
2009-08-06 07:51:30 -04:00
|
|
|
// return both matches, unless they are one and the same
|
2012-02-10 12:14:38 -05:00
|
|
|
return seasonEpisode.equals(absoluteEpisode) ? singleton(seasonEpisode) : asList(seasonEpisode, absoluteEpisode);
|
2009-07-23 10:25:43 -04:00
|
|
|
}
|
|
|
|
};
|
2011-12-28 19:41:27 -05:00
|
|
|
|
|
|
|
// only use S00E00 and SxE pattern in strict mode
|
|
|
|
if (strict) {
|
2012-03-04 19:49:11 -05:00
|
|
|
patterns = new SeasonEpisodePattern[] { patterns[0], patterns[1], patterns[2] };
|
2011-12-28 19:41:27 -05:00
|
|
|
}
|
2013-04-18 06:03:41 -04:00
|
|
|
|
|
|
|
// season folder pattern for complementing partial sxe info from filename
|
2013-04-19 05:19:49 -04:00
|
|
|
seasonPattern = compile("Season[-._ ]?(\\d{1,2})", CASE_INSENSITIVE | UNICODE_CASE);
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
/**
|
|
|
|
* Try to get season and episode numbers for the given string.
|
|
|
|
*
|
|
|
|
* @param name match this string against the a set of know patterns
|
|
|
|
* @return the matches returned by the first pattern that returns any matches for this
|
|
|
|
* string, or null if no pattern returned any matches
|
|
|
|
*/
|
|
|
|
public List<SxE> match(CharSequence name) {
|
|
|
|
for (SeasonEpisodePattern pattern : patterns) {
|
|
|
|
List<SxE> match = pattern.match(name);
|
|
|
|
|
|
|
|
if (!match.isEmpty()) {
|
|
|
|
// current pattern did match
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
}
|
2013-04-18 06:03:41 -04:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public List<SxE> match(File file) {
|
|
|
|
for (SeasonEpisodePattern pattern : patterns) {
|
|
|
|
List<SxE> match = pattern.match(file.getName());
|
|
|
|
|
|
|
|
if (!match.isEmpty()) {
|
|
|
|
// current pattern did match
|
|
|
|
for (int i = 0; i < match.size(); i++) {
|
|
|
|
if (match.get(i).season < 0) {
|
|
|
|
Matcher sm = seasonPattern.matcher(file.getPath());
|
|
|
|
if (sm.find()) {
|
|
|
|
match.set(i, new SxE(Integer.parseInt(sm.group(1)), match.get(i).episode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
}
|
2009-01-24 19:08:57 -05:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-07-23 10:25:43 -04:00
|
|
|
public int find(CharSequence name, int fromIndex) {
|
2009-01-24 19:08:57 -05:00
|
|
|
for (SeasonEpisodePattern pattern : patterns) {
|
2009-07-23 10:25:43 -04:00
|
|
|
int index = pattern.find(name, fromIndex);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
if (index >= 0) {
|
|
|
|
// current pattern did match
|
|
|
|
return index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-07-29 16:31:08 -04:00
|
|
|
public Matcher matcher(CharSequence name) {
|
|
|
|
for (SeasonEpisodePattern pattern : patterns) {
|
|
|
|
Matcher matcher = pattern.matcher(name);
|
|
|
|
|
|
|
|
// check if current pattern matches
|
|
|
|
if (matcher.find()) {
|
|
|
|
// reset matcher state
|
|
|
|
return matcher.reset();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
public static class SxE {
|
|
|
|
|
2009-05-03 11:21:04 -04:00
|
|
|
public static final int UNDEFINED = -1;
|
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
public final int season;
|
|
|
|
public final int episode;
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2010-10-24 12:33:38 -04:00
|
|
|
public SxE(Integer season, Integer episode) {
|
|
|
|
this.season = season != null ? season : UNDEFINED;
|
|
|
|
this.episode = episode != null ? episode : UNDEFINED;
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
public SxE(String season, String episode) {
|
|
|
|
this.season = parse(season);
|
|
|
|
this.episode = parse(episode);
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
protected int parse(String number) {
|
2009-05-03 11:21:04 -04:00
|
|
|
try {
|
|
|
|
return Integer.parseInt(number);
|
|
|
|
} catch (Exception e) {
|
|
|
|
return UNDEFINED;
|
|
|
|
}
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public boolean equals(Object object) {
|
|
|
|
if (object instanceof SxE) {
|
|
|
|
SxE other = (SxE) object;
|
|
|
|
return this.season == other.season && this.episode == other.episode;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-06-13 05:53:48 -04:00
|
|
|
@Override
|
|
|
|
public int hashCode() {
|
2009-07-16 08:06:51 -04:00
|
|
|
return Arrays.hashCode(new Object[] { season, episode });
|
2009-06-13 05:53:48 -04:00
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
@Override
|
|
|
|
public String toString() {
|
2009-08-06 07:51:30 -04:00
|
|
|
return season >= 0 ? String.format("%dx%02d", season, episode) : String.format("%02d", episode);
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
public static class SeasonEpisodeFilter {
|
|
|
|
|
|
|
|
public final int seasonLimit;
|
|
|
|
public final int seasonEpisodeLimit;
|
|
|
|
public final int absoluteEpisodeLimit;
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
public SeasonEpisodeFilter(int seasonLimit, int seasonEpisodeLimit, int absoluteEpisodeLimit) {
|
|
|
|
this.seasonLimit = seasonLimit;
|
|
|
|
this.seasonEpisodeLimit = seasonEpisodeLimit;
|
|
|
|
this.absoluteEpisodeLimit = absoluteEpisodeLimit;
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
boolean filter(SxE sxe) {
|
|
|
|
return (sxe.season >= 0 && sxe.season < seasonLimit && sxe.episode < seasonEpisodeLimit) || (sxe.season < 0 && sxe.episode < absoluteEpisodeLimit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
public static class SeasonEpisodePattern {
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
protected final Pattern pattern;
|
2011-11-22 09:44:54 -05:00
|
|
|
protected final SeasonEpisodeFilter sanity;
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
public SeasonEpisodePattern(SeasonEpisodeFilter sanity, String pattern) {
|
2009-07-23 10:25:43 -04:00
|
|
|
this.pattern = Pattern.compile(pattern);
|
2011-11-22 09:44:54 -05:00
|
|
|
this.sanity = sanity;
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-07-29 16:31:08 -04:00
|
|
|
public Matcher matcher(CharSequence name) {
|
|
|
|
return pattern.matcher(name);
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-07-23 10:25:43 -04:00
|
|
|
protected Collection<SxE> process(MatchResult match) {
|
2012-02-10 12:14:38 -05:00
|
|
|
return singleton(new SxE(match.group(1), match.group(2)));
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-01-24 19:08:57 -05:00
|
|
|
public List<SxE> match(CharSequence name) {
|
2009-07-23 10:25:43 -04:00
|
|
|
// name will probably contain no more than two matches
|
|
|
|
List<SxE> matches = new ArrayList<SxE>(2);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2009-07-29 16:31:08 -04:00
|
|
|
Matcher matcher = matcher(name);
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
while (matcher.find()) {
|
2011-11-22 09:44:54 -05:00
|
|
|
for (SxE value : process(matcher)) {
|
|
|
|
if (sanity == null || sanity.filter(value)) {
|
|
|
|
matches.add(value);
|
|
|
|
}
|
|
|
|
}
|
2009-01-24 19:08:57 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return matches;
|
|
|
|
}
|
|
|
|
|
2011-12-28 19:41:27 -05:00
|
|
|
|
2009-07-23 10:25:43 -04:00
|
|
|
public int find(CharSequence name, int fromIndex) {
|
2011-11-22 09:44:54 -05:00
|
|
|
Matcher matcher = matcher(name).region(fromIndex, name.length());
|
2009-01-24 19:08:57 -05:00
|
|
|
|
2011-11-22 09:44:54 -05:00
|
|
|
while (matcher.find()) {
|
|
|
|
for (SxE value : process(matcher)) {
|
|
|
|
if (sanity == null || sanity.filter(value)) {
|
|
|
|
return matcher.start();
|
|
|
|
}
|
|
|
|
}
|
2009-07-29 16:31:08 -04:00
|
|
|
}
|
2009-01-24 19:08:57 -05:00
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|