mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-11 05:48:01 -05:00
Optimize DateMatcher
This commit is contained in:
parent
3b79ef9e39
commit
ffa8b021e0
@ -48,7 +48,6 @@ import net.filebot.archive.Archive;
|
||||
import net.filebot.format.MediaBindingBean;
|
||||
import net.filebot.similarity.CommonSequenceMatcher;
|
||||
import net.filebot.similarity.DateMatcher;
|
||||
import net.filebot.similarity.DateMetric;
|
||||
import net.filebot.similarity.EpisodeMetrics;
|
||||
import net.filebot.similarity.MetricAvg;
|
||||
import net.filebot.similarity.NameSimilarityMetric;
|
||||
@ -118,11 +117,16 @@ public class MediaDetection {
|
||||
|
||||
private static final SeasonEpisodeMatcher seasonEpisodeMatcherStrict = new SmartSeasonEpisodeMatcher(true);
|
||||
private static final SeasonEpisodeMatcher seasonEpisodeMatcherNonStrict = new SmartSeasonEpisodeMatcher(false);
|
||||
private static final DateMatcher dateMatcher = new DateMatcher(Locale.ROOT, DateMatcher.DEFAULT_SANITY);
|
||||
|
||||
public static SeasonEpisodeMatcher getSeasonEpisodeMatcher(boolean strict) {
|
||||
return strict ? seasonEpisodeMatcherStrict : seasonEpisodeMatcherNonStrict;
|
||||
}
|
||||
|
||||
public static DateMatcher getDateMatcher() {
|
||||
return dateMatcher;
|
||||
}
|
||||
|
||||
public static boolean isEpisode(String name, boolean strict) {
|
||||
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
|
||||
}
|
||||
@ -136,7 +140,10 @@ public class MediaDetection {
|
||||
}
|
||||
|
||||
public static SimpleDate parseDate(Object object) {
|
||||
return new DateMetric().parse(object);
|
||||
if (object instanceof File) {
|
||||
return getDateMatcher().match((File) object);
|
||||
}
|
||||
return getDateMatcher().match(object.toString());
|
||||
}
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale, boolean useSeriesIndex, boolean useAnimeIndex) throws Exception {
|
||||
@ -264,7 +271,7 @@ public class MediaDetection {
|
||||
|
||||
// then Date pattern
|
||||
if (match == null) {
|
||||
match = new DateMatcher().match(name);
|
||||
match = getDateMatcher().match(name);
|
||||
}
|
||||
|
||||
// check SxE non-strict
|
||||
@ -744,7 +751,7 @@ public class MediaDetection {
|
||||
}
|
||||
|
||||
public static List<Integer> parseMovieYear(String name) {
|
||||
return matchIntegers(name).stream().filter(year -> 1950 < year && year < 2050).collect(toList());
|
||||
return matchIntegers(name).stream().filter(DateMatcher.DEFAULT_SANITY::acceptYear).collect(toList());
|
||||
}
|
||||
|
||||
public static String reduceMovieName(String name, boolean strict) throws IOException {
|
||||
|
@ -1,47 +1,84 @@
|
||||
package net.filebot.similarity;
|
||||
|
||||
import static java.util.Arrays.*;
|
||||
import static java.util.stream.Collectors.*;
|
||||
import static net.filebot.util.FileUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDate;
|
||||
import java.time.Month;
|
||||
import java.time.chrono.ChronoLocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import net.filebot.web.SimpleDate;
|
||||
|
||||
public class DateMatcher {
|
||||
|
||||
public static final DateFilter DEFAULT_SANITY = new DateFilter(LocalDate.of(1930, Month.JANUARY, 1), LocalDate.of(2050, Month.JANUARY, 1));
|
||||
|
||||
private final DatePattern[] patterns;
|
||||
|
||||
public DateMatcher() {
|
||||
patterns = new DatePattern[7];
|
||||
public DateMatcher(Locale locale, DateFilter sanity) {
|
||||
// generate default date format patterns
|
||||
List<String> format = new ArrayList<String>(7);
|
||||
|
||||
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc
|
||||
patterns[0] = new NumericDatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
|
||||
format.add("y M d");
|
||||
|
||||
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc
|
||||
patterns[1] = new NumericDatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
|
||||
format.add("d M y");
|
||||
|
||||
// match yyyy.MMMMM.dd patterns like 2015.October.05
|
||||
patterns[2] = new DateFormatPattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](?i:January|February|March|April|May|June|July|August|September|October|November|December)[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", "yyyy MMMMM dd");
|
||||
format.add("y MMMM d");
|
||||
|
||||
// match yyyy.MMM.dd patterns like 2015.Oct.06
|
||||
patterns[3] = new DateFormatPattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](?i:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", "yyyy MMM dd");
|
||||
// match yyyy.MMM.dd patterns like 2015.Oct.6
|
||||
format.add("y MMM d");
|
||||
|
||||
// match dd.MMMMM.yyyy patterns like 25 July 2014
|
||||
patterns[4] = new DateFormatPattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](?i:January|February|March|April|May|June|July|August|September|October|November|December)[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", "dd MMMMM yyyy");
|
||||
format.add("d MMMM y");
|
||||
|
||||
// match dd.MMM.yyyy patterns like 8 Sep 2015
|
||||
patterns[5] = new DateFormatPattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](?i:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", "dd MMM yyyy");
|
||||
format.add("d MMM y");
|
||||
|
||||
// match yyyymmdd patterns like 20140408
|
||||
patterns[6] = new DateFormatPattern("(?<!\\p{Alnum})(\\d{8})(?!\\p{Alnum})", "yyyyMMdd");
|
||||
format.add("yyyyMMdd");
|
||||
|
||||
this.patterns = compile(format, locale, sanity);
|
||||
}
|
||||
|
||||
public DateMatcher(DatePattern... patterns) {
|
||||
this.patterns = patterns;
|
||||
protected DatePattern[] compile(List<String> dateFormat, Locale locale, DateFilter sanity) {
|
||||
return dateFormat.stream().map(format -> {
|
||||
String pattern = stream(format.split(DateFormatPattern.DELIMITER)).map(this::getPatternGroup).collect(joining("[^\\p{Alnum}]", "(?<!\\p{Alnum})", "(?!\\p{Alnum})"));
|
||||
return new DateFormatPattern(pattern, format, locale, sanity);
|
||||
}).toArray(DateFormatPattern[]::new);
|
||||
}
|
||||
|
||||
protected String getPatternGroup(String token) {
|
||||
switch (token) {
|
||||
case "y":
|
||||
return "(\\d{4})";
|
||||
case "M":
|
||||
return "(\\d{1,2})";
|
||||
case "d":
|
||||
return "(\\d{1,2})";
|
||||
case "yyyyMMdd":
|
||||
return "(\\d{8})";
|
||||
case "MMMM":
|
||||
return "(January|February|March|April|May|June|July|August|September|October|November|December)";
|
||||
case "MMM":
|
||||
return "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
|
||||
default:
|
||||
throw new IllegalArgumentException(token);
|
||||
}
|
||||
}
|
||||
|
||||
public SimpleDate match(CharSequence seq) {
|
||||
@ -52,7 +89,6 @@ public class DateMatcher {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -64,7 +100,6 @@ public class DateMatcher {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -89,7 +124,7 @@ public class DateMatcher {
|
||||
return tail;
|
||||
}
|
||||
|
||||
private static interface DatePattern {
|
||||
public static interface DatePattern {
|
||||
|
||||
public SimpleDate match(CharSequence seq);
|
||||
|
||||
@ -97,60 +132,34 @@ public class DateMatcher {
|
||||
|
||||
}
|
||||
|
||||
private static class NumericDatePattern implements DatePattern {
|
||||
public static class DateFormatPattern implements DatePattern {
|
||||
|
||||
public static final String DELIMITER = " ";
|
||||
|
||||
protected final Pattern pattern;
|
||||
protected final int[] order;
|
||||
protected final DateTimeFormatter format;
|
||||
protected final DateFilter sanity;
|
||||
|
||||
public NumericDatePattern(String pattern, int[] order) {
|
||||
this.pattern = Pattern.compile(pattern);
|
||||
this.order = order;
|
||||
public DateFormatPattern(String pattern, String format, Locale locale, DateFilter sanity) {
|
||||
this.pattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
|
||||
this.format = DateTimeFormatter.ofPattern(format, locale);
|
||||
this.sanity = sanity;
|
||||
}
|
||||
|
||||
protected SimpleDate process(MatchResult match) {
|
||||
return new SimpleDate(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
|
||||
}
|
||||
try {
|
||||
String dateString = IntStream.rangeClosed(1, match.groupCount()).mapToObj(match::group).collect(joining(DELIMITER));
|
||||
LocalDate date = LocalDate.parse(dateString, format);
|
||||
|
||||
@Override
|
||||
public SimpleDate match(CharSequence seq) {
|
||||
Matcher matcher = pattern.matcher(seq);
|
||||
|
||||
if (matcher.find()) {
|
||||
return process(matcher);
|
||||
if (sanity == null || sanity.test(date)) {
|
||||
return new SimpleDate(date.getYear(), date.getMonthValue(), date.getDayOfMonth());
|
||||
}
|
||||
} catch (DateTimeParseException e) {
|
||||
// date is invalid
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int find(CharSequence seq, int fromIndex) {
|
||||
Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length());
|
||||
|
||||
if (matcher.find()) {
|
||||
return matcher.start();
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class DateFormatPattern implements DatePattern {
|
||||
|
||||
protected final Pattern space = Pattern.compile("[^\\p{Alnum}]+");
|
||||
|
||||
protected final Pattern pattern;
|
||||
protected final String dateFormat;
|
||||
|
||||
public DateFormatPattern(String pattern, String dateFormat) {
|
||||
this.pattern = Pattern.compile(pattern);
|
||||
this.dateFormat = dateFormat;
|
||||
}
|
||||
|
||||
protected SimpleDate process(MatchResult match) {
|
||||
return SimpleDate.parse(space.matcher(match.group()).replaceAll(" "), dateFormat);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimpleDate match(CharSequence seq) {
|
||||
Matcher matcher = pattern.matcher(seq);
|
||||
@ -158,7 +167,6 @@ public class DateMatcher {
|
||||
if (matcher.find()) {
|
||||
return process(matcher);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -171,10 +179,34 @@ public class DateMatcher {
|
||||
return matcher.start();
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class DateFilter implements Predicate<ChronoLocalDate> {
|
||||
|
||||
public final ChronoLocalDate lowerBound;
|
||||
public final ChronoLocalDate upperBound;
|
||||
|
||||
public DateFilter(ChronoLocalDate lowerBound, ChronoLocalDate upperBound) {
|
||||
this.lowerBound = lowerBound;
|
||||
this.upperBound = upperBound;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(ChronoLocalDate date) {
|
||||
return date.isAfter(lowerBound) && date.isBefore(upperBound);
|
||||
}
|
||||
|
||||
public boolean acceptDate(int year, int month, int day) {
|
||||
return test(LocalDate.of(year, month, day));
|
||||
}
|
||||
|
||||
public boolean acceptYear(int year) {
|
||||
return test(LocalDate.of(year, 1, 1));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,10 +8,6 @@ public class DateMetric implements SimilarityMetric {
|
||||
|
||||
private final DateMatcher matcher;
|
||||
|
||||
public DateMetric() {
|
||||
this.matcher = new DateMatcher();
|
||||
}
|
||||
|
||||
public DateMetric(DateMatcher matcher) {
|
||||
this.matcher = matcher;
|
||||
}
|
||||
@ -33,7 +29,6 @@ public class DateMetric implements SimilarityMetric {
|
||||
if (object instanceof File) {
|
||||
return matcher.match((File) object);
|
||||
}
|
||||
|
||||
return matcher.match(object.toString());
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
}),
|
||||
|
||||
// Match episode airdate
|
||||
AirDate(new DateMetric() {
|
||||
AirDate(new DateMetric(getDateMatcher()) {
|
||||
|
||||
private final Map<Object, SimpleDate> transformCache = synchronizedMap(new HashMap<Object, SimpleDate>(64, 4));
|
||||
|
||||
|
@ -43,7 +43,7 @@ public class SeriesNameMatcher {
|
||||
|
||||
public SeriesNameMatcher(Locale locale, boolean strict) {
|
||||
seasonEpisodeMatcher = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict);
|
||||
dateMatcher = new DateMatcher();
|
||||
dateMatcher = new DateMatcher(locale, DateMatcher.DEFAULT_SANITY);
|
||||
nameSimilarityMetric = new NameSimilarityMetric();
|
||||
|
||||
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) {
|
||||
|
@ -50,7 +50,7 @@ public class LanguageComboBox extends JComboBox {
|
||||
|
||||
// guess favorite languages
|
||||
if (getModel().favorites().isEmpty()) {
|
||||
for (Locale locale : new Locale[] { Locale.getDefault(), Locale.ENGLISH }) {
|
||||
for (Locale locale : new Locale[] { Locale.ENGLISH, Locale.getDefault() }) {
|
||||
getModel().favorites().add(getLanguage(locale.getLanguage()));
|
||||
}
|
||||
}
|
||||
|
@ -11,6 +11,10 @@ import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.time.temporal.ChronoField;
|
||||
import java.time.temporal.TemporalAccessor;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumMap;
|
||||
@ -22,6 +26,8 @@ import java.util.Map.Entry;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Function;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -211,18 +217,16 @@ public class OMDbClient implements MovieIdentificationService {
|
||||
fields.put(MovieProperty.poster_path, data.get("poster"));
|
||||
|
||||
// convert release date to yyyy-MM-dd
|
||||
SimpleDate released = SimpleDate.parse(data.get("released"), "dd MMM yyyy");
|
||||
if (released != null) {
|
||||
fields.put(MovieProperty.release_date, released.format("yyyy-MM-dd"));
|
||||
} else {
|
||||
SimpleDate year = SimpleDate.parse(data.get("year"), "yyyy");
|
||||
if (year != null) {
|
||||
fields.put(MovieProperty.release_date, year.format("yyyy-MM-dd"));
|
||||
}
|
||||
SimpleDate release = parsePartialDate(data.get("released"), "d MMM yyyy");
|
||||
if (release == null) {
|
||||
release = parsePartialDate(data.get("released"), "yyyy");
|
||||
}
|
||||
if (release != null) {
|
||||
fields.put(MovieProperty.release_date, release.toString());
|
||||
}
|
||||
|
||||
// convert lists
|
||||
Pattern delim = Pattern.compile(",");
|
||||
|
||||
List<String> genres = split(delim, data.get("genre"), String::toString);
|
||||
List<String> languages = split(delim, data.get("language"), String::toString);
|
||||
|
||||
@ -234,10 +238,29 @@ public class OMDbClient implements MovieIdentificationService {
|
||||
return new MovieInfo(fields, emptyList(), genres, emptyMap(), languages, emptyList(), emptyList(), actors, emptyList());
|
||||
}
|
||||
|
||||
private SimpleDate parsePartialDate(String value, String format) {
|
||||
if (value != null && value.length() > 0) {
|
||||
try {
|
||||
TemporalAccessor f = DateTimeFormatter.ofPattern(format, Locale.ENGLISH).parse(value);
|
||||
if (f.isSupported(ChronoField.YEAR)) {
|
||||
if (f.isSupported(ChronoField.MONTH_OF_YEAR) && f.isSupported(ChronoField.DAY_OF_MONTH)) {
|
||||
return new SimpleDate(f.get(ChronoField.YEAR), f.get(ChronoField.MONTH_OF_YEAR), f.get(ChronoField.DAY_OF_MONTH));
|
||||
} else {
|
||||
return new SimpleDate(f.get(ChronoField.YEAR), 1, 1);
|
||||
}
|
||||
}
|
||||
} catch (DateTimeParseException e) {
|
||||
Logger.getLogger(OMDbClient.class.getName()).log(Level.WARNING, String.format("Bad date: %s: %s", value, e.getMessage()));
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private <T> List<T> split(Pattern regex, String value, Function<String, T> toObject) {
|
||||
if (value == null || value.isEmpty())
|
||||
return emptyList();
|
||||
|
||||
return regex.splitAsStream(value).map(String::trim).filter(s -> !s.equals("N/A")).map(toObject).collect(toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
14
test/net/filebot/media/MediaDetectionTest.java
Normal file
14
test/net/filebot/media/MediaDetectionTest.java
Normal file
@ -0,0 +1,14 @@
|
||||
package net.filebot.media;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class MediaDetectionTest {
|
||||
|
||||
@Test
|
||||
public void parseMovieYear() {
|
||||
assertEquals("[2009]", MediaDetection.parseMovieYear("Avatar 2009 2100").toString());
|
||||
assertEquals("[1955]", MediaDetection.parseMovieYear("1898 Sissi 1955").toString());
|
||||
}
|
||||
}
|
38
test/net/filebot/similarity/DateMatcherTest.java
Normal file
38
test/net/filebot/similarity/DateMatcherTest.java
Normal file
@ -0,0 +1,38 @@
|
||||
package net.filebot.similarity;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class DateMatcherTest {
|
||||
|
||||
DateMatcher m = new DateMatcher(Locale.ROOT, DateMatcher.DEFAULT_SANITY);
|
||||
|
||||
@Test
|
||||
public void parse() {
|
||||
assertEquals("2010-10-24", m.match("2010-10-24").toString());
|
||||
assertEquals("2009-06-01", m.match("2009/6/1").toString());
|
||||
assertEquals("2010-01-01", m.match("1.1.2010").toString());
|
||||
assertEquals("2010-06-01", m.match("01/06/2010").toString());
|
||||
assertEquals("2015-10-05", m.match("2015.October.05").toString());
|
||||
assertEquals("2015-10-06", m.match("2015.Oct.6").toString());
|
||||
assertEquals("2014-07-25", m.match("25 July 2014").toString());
|
||||
assertEquals("2015-09-08", m.match("8 Sep 2015").toString());
|
||||
assertEquals("2014-04-08", m.match("20140408").toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseIllegal() {
|
||||
assertEquals(null, m.match("2000-01-32"));
|
||||
assertEquals(null, m.match("123456789"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void sanity() {
|
||||
assertEquals(null, m.match("1911-01-01")); // too low
|
||||
assertEquals(null, m.match("2099-01-01")); // too high
|
||||
}
|
||||
|
||||
}
|
@ -1,16 +1,14 @@
|
||||
|
||||
package net.filebot.similarity;
|
||||
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.junit.Test;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class DateMetricTest {
|
||||
|
||||
private static DateMetric metric = new DateMetric();
|
||||
|
||||
DateMetric metric = new DateMetric(new DateMatcher(Locale.ROOT, DateMatcher.DEFAULT_SANITY));
|
||||
|
||||
@Test
|
||||
public void getSimilarity() {
|
||||
|
Loading…
Reference in New Issue
Block a user