filebot/source/net/filebot/similarity/EpisodeMetrics.java

868 lines
25 KiB
Java
Raw Normal View History

2014-04-19 02:30:29 -04:00
package net.filebot.similarity;
import static java.util.Arrays.*;
2013-09-11 13:22:00 -04:00
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static java.util.stream.Collectors.*;
import static net.filebot.Logging.*;
import static net.filebot.media.MediaDetection.*;
2016-03-27 09:52:59 -04:00
import static net.filebot.media.XattrMetaInfo.*;
2014-04-19 02:30:29 -04:00
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import java.io.File;
import java.time.Instant;
2016-02-22 16:29:26 -05:00
import java.time.LocalDate;
import java.time.temporal.ChronoUnit;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
2019-02-05 03:10:36 -05:00
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import com.ibm.icu.text.Transliterator;
import net.filebot.media.MediaCharacteristics;
import net.filebot.media.MediaCharacteristicsParser;
2014-04-19 02:30:29 -04:00
import net.filebot.media.SmartSeasonEpisodeMatcher;
import net.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.filebot.vfs.FileInfo;
import net.filebot.web.Episode;
import net.filebot.web.EpisodeFormat;
import net.filebot.web.Movie;
import net.filebot.web.SeriesInfo;
2014-06-24 06:59:00 -04:00
import net.filebot.web.SimpleDate;
public class EpisodeMetrics {
// Match by season / episode numbers
public final SimilarityMetric SeasonEpisode = new SeasonEpisodeMetric(new SmartSeasonEpisodeMatcher(null, false)) {
2019-02-05 03:10:36 -05:00
private final Map<Object, Collection<SxE>> cache = synchronizedMap(new HashMap<>(64, 4));
@Override
protected Collection<SxE> parse(Object object) {
// SxE sets for Episode objects cannot be cached because the same Episode (by ID) may have different episode numbers depending on the order (e.g. Airdate VS DVD order)
if (object instanceof Episode) {
Episode episode = (Episode) object;
return parse(episode);
}
if (object instanceof Movie) {
return emptySet();
}
2019-02-05 03:10:36 -05:00
return cache.computeIfAbsent(object, o -> {
Collection<SxE> sxe = super.parse(o);
return sxe == null ? emptySet() : sxe;
});
2016-08-17 03:23:09 -04:00
}
2016-08-17 03:23:09 -04:00
private Set<SxE> parse(Episode e) {
// get SxE from episode, both SxE for season/episode numbering and SxE for absolute episode numbering
Set<SxE> sxe = new HashSet<SxE>(2);
// default SxE numbering
if (e.getEpisode() != null) {
sxe.add(new SxE(e.getSeason(), e.getEpisode()));
// absolute numbering
2016-08-17 03:23:09 -04:00
if (e.getAbsolute() != null) {
sxe.add(new SxE(null, e.getAbsolute()));
}
2016-08-17 03:23:09 -04:00
} else {
// 0xSpecial numbering
2016-08-17 03:23:09 -04:00
if (e.getSpecial() != null) {
sxe.add(new SxE(0, e.getSpecial()));
}
}
2016-08-17 03:23:09 -04:00
return sxe;
}
2016-08-17 03:23:09 -04:00
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "SeasonEpisode";
}
};
// Match episode airdate
public final SimilarityMetric AirDate = new DateMetric(getDateMatcher()) {
2019-02-05 03:10:36 -05:00
private final Map<Object, Optional<SimpleDate>> cache = synchronizedMap(new HashMap<>(64, 4));
@Override
public SimpleDate parse(Object object) {
if (object instanceof Episode) {
Episode episode = (Episode) object;
return episode.getAirdate();
}
2017-02-18 11:41:36 -05:00
if (object instanceof Movie) {
return null;
}
2019-02-05 03:10:36 -05:00
return cache.computeIfAbsent(object, o -> {
return Optional.ofNullable(super.parse(o));
}).orElse(null);
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "AirDate";
}
};
// Match by episode/movie title
public final SimilarityMetric Title = new SubstringMetric() {
@Override
protected String normalize(Object object) {
if (object instanceof Episode) {
Episode e = (Episode) object;
// don't use title for matching if title equals series name
if (e.getTitle() != null) {
String title = normalizeObject(removeTrailingBrackets(e.getTitle()));
if (title.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(title)) {
return title;
}
}
}
if (object instanceof Movie) {
2014-02-27 14:48:31 -05:00
return normalizeObject(((Movie) object).getName());
}
2014-02-27 14:48:31 -05:00
String s = normalizeObject(object);
return s.length() >= 4 ? s : null; // only consider long enough strings to avoid false matches
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "Title";
}
};
// Match by SxE and airdate
public final SimilarityMetric EpisodeIdentifier = new MetricCascade(SeasonEpisode, AirDate);
// Advanced episode <-> file matching Lv1
public final SimilarityMetric EpisodeFunnel = new MetricCascade(SeasonEpisode, AirDate, Title);
// Advanced episode <-> file matching Lv2
public final SimilarityMetric EpisodeBalancer = new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
float sxe = EpisodeIdentifier.getSimilarity(o1, o2);
float title = sxe < 1 ? Title.getSimilarity(o1, o2) : 1; // if SxE matches then boost score as if it was a title match as well
// account for misleading SxE patterns in the episode title
if (sxe < 0 && title == 1 && EpisodeIdentifier.getSimilarity(getTitle(o1), getTitle(o2)) == 1) {
sxe = 1;
title = 0;
}
2013-09-27 05:08:20 -04:00
// allow title to override SxE only if series name also is a good match
if (title == 1 && SeriesName.getSimilarity(o1, o2) < 0.5f) {
title = 0;
}
// 1:SxE && Title, 2:SxE
2016-03-20 16:25:59 -04:00
return (float) ((Math.max(sxe, 0) * title) + (Math.floor(sxe) / 10));
}
public Object getTitle(Object o) {
if (o instanceof Episode) {
Episode e = (Episode) o;
return e.getSeriesName() + " " + e.getTitle();
}
return o;
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "EpisodeBalancer";
}
};
// Match series title and episode title against folder structure and file name
public final SimilarityMetric SubstringFields = new SubstringMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
2011-11-08 13:26:54 -05:00
String[] f1 = normalize(fields(o1));
String[] f2 = normalize(fields(o2));
// match all fields and average similarity
double sum = 0;
for (int i = 0; i < f1.length; i++) {
for (int j = 0; j < f2.length; j++) {
float f = super.getSimilarity(f1[i], f2[j]);
if (f > 0) {
// 2-sqrt(x) from 0 to 1
double multiplier = 2 - Math.sqrt((double) (i + j) / (f1.length + f2.length));
// bonus points for primary matches (e.g. primary title matches filename > alias title matches folder path)
sum += f * multiplier;
}
}
}
sum /= f1.length * f2.length;
return sum >= 0.9 ? 1 : sum >= 0.1 ? 0.5f : 0;
}
2011-11-08 13:26:54 -05:00
protected String[] normalize(Object[] objects) {
// normalize objects (and make sure to keep word boundaries)
return stream(objects).map(EpisodeMetrics.this::normalizeObject).toArray(String[]::new);
2011-11-08 13:26:54 -05:00
}
protected final int MAX_FIELDS = 5;
2011-11-08 13:26:54 -05:00
protected Object[] fields(Object object) {
if (object instanceof Episode) {
Episode e = (Episode) object;
Stream<String> primaryNames = Stream.of(e.getSeriesName(), e.getTitle());
2016-10-20 15:48:33 -04:00
Stream<String> aliasNames = e.getSeriesInfo() == null ? Stream.empty() : e.getSeriesInfo().getAliasNames().stream().limit(MAX_FIELDS);
Stream<String> names = Stream.concat(primaryNames, aliasNames).filter(s -> s != null && s.length() > 0).map(Normalization::removeTrailingBrackets).distinct();
return copyOf(names.limit(MAX_FIELDS).toArray(), MAX_FIELDS);
}
if (object instanceof File) {
File f = (File) object;
return new Object[] { f, f.getParentFile().getPath() };
}
if (object instanceof Movie) {
Movie m = (Movie) object;
return new Object[] { m.getName(), m.getYear() };
}
2011-11-08 13:26:54 -05:00
return new Object[] { object };
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "SubstringFields";
}
};
// Match via common word sequence in episode name and file name
public final SimilarityMetric NameSubstringSequence = new SequenceMatchSimilarity() {
@Override
public float getSimilarity(Object o1, Object o2) {
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
// match all fields and average similarity
float max = 0;
for (String s1 : f1) {
for (String s2 : f2) {
2016-03-20 16:25:59 -04:00
max = Math.max(super.getSimilarity(s1, s2), max);
}
}
2013-01-30 19:39:47 -05:00
// normalize absolute similarity to similarity rank (4 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one
2016-03-20 16:25:59 -04:00
return (float) (Math.floor(max * 4) / 4);
}
@Override
protected String normalize(Object object) {
return object.toString();
}
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
2019-06-07 11:08:06 -04:00
return getEffectiveIdentifiers(object).stream().map(it -> {
return normalizeObject(it);
}).toArray(String[]::new);
}
2019-06-07 11:08:06 -04:00
protected Collection<?> getEffectiveIdentifiers(Object object) {
if (object instanceof Episode) {
return ((Episode) object).getSeriesNames();
} else if (object instanceof Movie) {
return ((Movie) object).getEffectiveNames();
} else if (object instanceof File) {
return listPathTail((File) object, 3, true);
}
2019-06-07 11:08:06 -04:00
return singleton(object);
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "NameSubstringSequence";
}
};
// Match by generic name similarity (round rank)
public final SimilarityMetric Name = new NameSimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// normalize absolute similarity to similarity rank (4 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one
2016-03-20 16:25:59 -04:00
return (float) (Math.floor(super.getSimilarity(o1, o2) * 4) / 4);
}
@Override
protected String normalize(Object object) {
// simplify file name, if possible
2011-11-08 13:26:54 -05:00
return normalizeObject(object);
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "Name";
}
};
// Match by generic name similarity (absolute)
public final SimilarityMetric SeriesName = new NameSimilarityMetric() {
2016-02-10 13:32:39 -05:00
private final SeriesNameMatcher seriesNameMatcher = getSeriesNameMatcher(false);
@Override
public float getSimilarity(Object o1, Object o2) {
2013-12-27 17:49:56 -05:00
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
2013-12-27 17:49:56 -05:00
// match all fields and average similarity
float max = 0;
for (String s1 : f1) {
for (String s2 : f2) {
2016-03-20 16:25:59 -04:00
max = Math.max(super.getSimilarity(s1, s2), max);
2013-12-27 17:49:56 -05:00
}
}
// normalize absolute similarity to similarity rank (4 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one
2016-03-20 16:25:59 -04:00
return (float) (Math.floor(max * 4) / 4);
2013-12-27 17:49:56 -05:00
}
@Override
protected String normalize(Object object) {
return object.toString();
2013-12-27 17:49:56 -05:00
}
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
return getEffectiveIdentifiers(object).stream().map(EpisodeMetrics.this::normalizeObject).toArray(String[]::new);
2013-12-27 17:49:56 -05:00
}
protected List<?> getEffectiveIdentifiers(Object object) {
if (object instanceof Episode) {
Episode episode = (Episode) object;
// strip release info from known series name to make sure it matches the stripped filename
return stripReleaseInfo(episode.getSeriesNames(), true);
} else if (object instanceof File) {
File file = (File) object;
// guess potential series names from path
return listPathTail(file, 3, true).stream().map(f -> {
2013-12-27 17:49:56 -05:00
String fn = getName(f);
String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
return sn != null ? sn : fn;
}).collect(collectingAndThen(toList(), v -> stripReleaseInfo(v, true)));
2013-12-27 17:49:56 -05:00
}
2013-12-27 17:49:56 -05:00
return emptyList();
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "SeriesName";
}
};
public final SimilarityMetric SeriesNameBalancer = new MetricCascade(NameSubstringSequence, Name, SeriesName);
// Match by generic name similarity (absolute)
public final SimilarityMetric FilePath = new NameSimilarityMetric() {
@Override
protected String normalize(Object object) {
if (object instanceof File) {
object = normalizePathSeparators(getRelativePathTail((File) object, 3).getPath());
}
return normalizeObject(object.toString()); // simplify file name, if possible
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "FilePath";
}
};
public final SimilarityMetric FilePathBalancer = new NameSimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
String s1 = normalizeObject(o1);
String s2 = normalizeObject(o2);
s1 = stripReleaseInfo(s1, false);
s2 = stripReleaseInfo(s2, false);
2016-03-20 16:25:59 -04:00
int length = Math.min(s1.length(), s2.length());
s1 = s1.substring(0, length);
s2 = s2.substring(0, length);
2016-03-20 16:25:59 -04:00
return (float) (Math.floor(super.getSimilarity(s1, s2) * 4) / 4);
};
@Override
protected String normalize(Object object) {
return object.toString();
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "FilePathBalancer";
}
};
public final SimilarityMetric NumericSequence = new SequenceMatchSimilarity() {
@Override
public float getSimilarity(Object o1, Object o2) {
float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
2016-03-20 16:25:59 -04:00
return Math.max(lowerBound, upperBound);
};
@Override
protected String normalize(Object object) {
return object.toString();
};
protected String normalize(Object object, boolean numbersOnly) {
if (object instanceof Episode) {
Episode e = (Episode) object;
if (numbersOnly) {
object = EpisodeFormat.SeasonEpisode.formatSxE(e);
} else {
object = String.format("%s %s", e.getSeriesName(), EpisodeFormat.SeasonEpisode.formatSxE(e));
}
} else if (object instanceof Movie) {
Movie m = (Movie) object;
if (numbersOnly) {
object = m.getYear();
} else {
object = String.format("%s %s", m.getName(), m.getYear());
}
}
// simplify file name if possible and extract numbers
2016-01-09 23:54:35 -05:00
List<Integer> numbers = matchIntegers(normalizeObject(object));
return join(numbers, " ");
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "NumericSequence";
}
};
// Match by generic numeric similarity
public final SimilarityMetric Numeric = new NumericSimilarityMetric() {
@Override
2011-12-07 02:08:04 -05:00
public float getSimilarity(Object o1, Object o2) {
String[] f1 = fields(o1);
String[] f2 = fields(o2);
2011-12-07 02:08:04 -05:00
// match all fields and average similarity
float max = 0;
2011-12-07 02:08:04 -05:00
for (String s1 : f1) {
for (String s2 : f2) {
if (s1 != null && s2 != null) {
2016-03-20 16:25:59 -04:00
max = Math.max(super.getSimilarity(s1, s2), max);
2016-03-13 13:35:31 -04:00
if (max >= 1) {
return max;
}
}
2011-12-07 02:08:04 -05:00
}
}
return max;
2011-12-07 02:08:04 -05:00
}
2011-12-07 02:08:04 -05:00
protected String[] fields(Object object) {
if (object instanceof Episode) {
Episode episode = (Episode) object;
2016-03-13 13:35:31 -04:00
String[] f = new String[3];
f[0] = episode.getSeriesName();
2016-03-13 13:35:31 -04:00
f[1] = episode.getSpecial() == null ? EpisodeFormat.SeasonEpisode.formatSxE(episode) : episode.getSpecial().toString();
f[2] = episode.getAbsolute() == null ? null : episode.getAbsolute().toString();
return f;
2011-12-07 02:08:04 -05:00
}
2011-12-07 02:08:04 -05:00
if (object instanceof Movie) {
Movie movie = (Movie) object;
return new String[] { movie.getName(), String.valueOf(movie.getYear()) };
2011-12-07 02:08:04 -05:00
}
2016-03-13 13:35:31 -04:00
return new String[] { normalizeObject(object) };
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "Numeric";
}
};
// Prioritize proper episodes over specials
public final SimilarityMetric SpecialNumber = new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
return getSpecialFactor(o1) + getSpecialFactor(o2);
}
public int getSpecialFactor(Object object) {
if (object instanceof Episode) {
Episode episode = (Episode) object;
return episode.getSpecial() != null ? -1 : 1;
}
return 0;
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "SpecialNumber";
}
};
// Match by file length (only works when matching torrents or files)
public final SimilarityMetric FileSize = new FileSizeMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// order of arguments is logically irrelevant, but we might be able to save us a call to File.length() which is quite costly
return o1 instanceof File ? super.getSimilarity(o2, o1) : super.getSimilarity(o1, o2);
}
@Override
protected long getLength(Object object) {
if (object instanceof FileInfo) {
return ((FileInfo) object).getLength();
}
return super.getLength(object);
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "FileSize";
}
};
// Match by common words at the beginning of both files
public final SimilarityMetric FileName = new FileNameMetric() {
@Override
protected String getFileName(Object object) {
if (object instanceof File || object instanceof FileInfo) {
return normalizeObject(object);
}
return null;
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "FileName";
}
};
// Match by file last modified and episode release dates
public final TimeStampMetric TimeStamp = new TimeStampMetric(10, ChronoUnit.YEARS) {
private final Map<File, Long> cache = synchronizedMap(new HashMap<>());
@Override
public float getSimilarity(Object o1, Object o2) {
// adjust differentiation accuracy to about 2.5 years
float f = super.getSimilarity(o1, o2);
return f >= 0.75 ? 1 : f >= 0 ? 0 : -1;
}
private long getTimeStamp(SimpleDate date) {
// some episodes may not have a defined airdate
if (date != null) {
Instant t = date.toInstant();
if (t.isBefore(Instant.now())) {
return t.toEpochMilli();
}
}
// big penalty for episodes not yet aired
return -1;
}
private long getTimeStamp(File file) {
return cache.computeIfAbsent(file, f -> {
if (MediaCharacteristicsParser.DEFAULT.acceptVideoFile(f)) {
try (MediaCharacteristics mi = MediaCharacteristicsParser.DEFAULT.open(file)) {
Instant t = mi.getCreationTime();
if (t != null) {
return t.toEpochMilli();
}
} catch (Exception e) {
debug.warning("Failed to read media encoding date: " + e.getMessage());
}
}
return super.getTimeStamp(file); // default to file creation date
});
}
@Override
public long getTimeStamp(Object object) {
if (object instanceof Episode) {
Episode e = (Episode) object;
return getTimeStamp(e.getAirdate());
} else if (object instanceof Movie) {
Movie m = (Movie) object;
return getTimeStamp(new SimpleDate(m.getYear(), 1, 1));
} else if (object instanceof File) {
File file = (File) object;
return getTimeStamp(file);
}
return -1;
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "TimeStamp";
}
};
// Match by recently aired status
public final SimilarityMetric RecentlyAired = new TimeStampMetric(3, ChronoUnit.DAYS) {
@Override
public float getSimilarity(Object o1, Object o2) {
return super.getSimilarity(o1, o2) > 0 ? 1 : 0;
}
@Override
public long getTimeStamp(Object object) {
return object instanceof Episode || object instanceof File ? TimeStamp.getTimeStamp(object) : -1;
}
@Override
public String toString() {
return "RecentlyAired";
}
};
public final SimilarityMetric SeriesRating = new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
2016-02-22 16:29:26 -05:00
float r1 = getScore(o1);
float r2 = getScore(o2);
2014-03-22 05:51:43 -04:00
if (r1 < 0 || r2 < 0)
return -1;
2016-03-20 16:25:59 -04:00
return Math.max(r1, r2);
}
2016-02-22 16:29:26 -05:00
public float getScore(Object object) {
if (object instanceof Episode) {
SeriesInfo seriesInfo = ((Episode) object).getSeriesInfo();
if (seriesInfo != null && seriesInfo.getRating() != null && seriesInfo.getRatingCount() != null) {
if (seriesInfo.getRatingCount() >= 20) {
2016-03-20 16:25:59 -04:00
return (float) Math.floor(seriesInfo.getRating() / 3); // BOOST POPULAR SHOWS and PUT INTO 3 GROUPS
}
if (seriesInfo.getRatingCount() >= 1) {
return 0; // PENALIZE SHOWS WITH FEW RATINGS
}
return -1; // BIG PENALTY FOR SHOWS WITH 0 RATINGS
}
}
return 0;
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "SeriesRating";
}
};
public final SimilarityMetric VoteRate = new SimilarityMetric() {
2016-02-22 16:29:26 -05:00
@Override
public float getSimilarity(Object o1, Object o2) {
float r1 = getScore(o1);
float r2 = getScore(o2);
2016-03-20 16:25:59 -04:00
return Math.max(r1, r2) >= 0.1 ? 1 : 0;
2016-02-22 16:29:26 -05:00
}
public float getScore(Object object) {
if (object instanceof Episode) {
SeriesInfo seriesInfo = ((Episode) object).getSeriesInfo();
if (seriesInfo != null && seriesInfo.getRating() != null && seriesInfo.getRatingCount() != null && seriesInfo.getStartDate() != null) {
long days = ChronoUnit.DAYS.between(seriesInfo.getStartDate().toLocalDate(), LocalDate.now());
if (days > 0) {
return (float) ((seriesInfo.getRatingCount().doubleValue() / days) * seriesInfo.getRating());
}
}
}
return 0;
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "VoteRate";
}
};
2016-02-22 16:29:26 -05:00
// Match by (region) or (year) hints
public final SimilarityMetric RegionHint = new SimilarityMetric() {
2016-02-10 13:32:39 -05:00
private final Pattern hint = compile("[(](\\p{Alpha}+|\\p{Digit}+)[)]$");
private final SeriesNameMatcher seriesNameMatcher = getSeriesNameMatcher(true);
@Override
public float getSimilarity(Object o1, Object o2) {
Set<String> h1 = getHint(o1);
Set<String> h2 = getHint(o2);
return h1.isEmpty() || h2.isEmpty() ? 0 : h1.containsAll(h2) || h2.containsAll(h1) ? 1 : 0;
}
public Set<String> getHint(Object o) {
if (o instanceof Episode) {
for (String sn : ((Episode) o).getSeriesNames()) {
Matcher m = hint.matcher(sn);
if (m.find()) {
return singleton(m.group(1).trim().toLowerCase());
}
}
} else if (o instanceof File) {
Set<String> h = new HashSet<String>();
for (File f : listPathTail((File) o, 3, true)) {
// try to focus on series name
2016-04-05 14:06:02 -04:00
String fn = f.getName();
String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
String[] tokens = PUNCTUATION_OR_SPACE.split(sn != null ? sn : fn);
for (String s : tokens) {
if (s.length() > 0) {
h.add(s.trim().toLowerCase());
}
}
}
return h;
}
return emptySet();
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "RegionHint";
}
};
// Match by stored MetaAttributes if possible
public final SimilarityMetric MetaAttributes = new CrossPropertyMetric() {
@Override
protected Map<String, Object> getProperties(Object object) {
// Episode / Movie objects
if (object instanceof Episode || object instanceof Movie) {
return super.getProperties(object);
}
// deserialize MetaAttributes if enabled and available
2014-06-24 06:59:00 -04:00
if (object instanceof File) {
Object metaObject = xattr.getMetaInfo((File) object);
2014-06-24 06:59:00 -04:00
if (metaObject != null) {
return super.getProperties(metaObject);
}
}
// ignore everything else
return emptyMap();
2014-03-22 05:51:43 -04:00
}
2019-02-05 03:31:44 -05:00
@Override
public String toString() {
return "MetaAttributes";
}
};
2019-02-05 03:10:36 -05:00
protected final Map<Object, String> transformCache = synchronizedMap(new HashMap<>(64, 4));
2019-02-02 11:04:39 -05:00
protected final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
protected String normalizeObject(Object object) {
if (object == null) {
return "";
2019-02-05 01:28:19 -05:00
}
return transformCache.computeIfAbsent(object, o -> {
// 1. convert to string
// 2. remove checksums, any [...] or (...)
// 3. remove obvious release info
// 4. apply transliterator
// 5. remove or normalize special characters
2019-02-05 03:10:36 -05:00
return normalizePunctuation(transliterator.transform(stripFormatInfo(removeEmbeddedChecksum(normalizeFileName(o))))).toLowerCase();
});
2017-02-18 11:41:36 -05:00
}
protected String normalizeFileName(Object object) {
2017-02-18 11:41:36 -05:00
if (object instanceof File) {
return getName((File) object);
} else if (object instanceof FileInfo) {
return ((FileInfo) object).getName();
}
return object.toString();
}
public SimilarityMetric[] matchSequence() {
// 1 pass: divide by file length (only works for matching torrent entries or files)
// 2-3 pass: divide by title or season / episode numbers
// 4 pass: divide by folder / file name and show name / episode title
// 5 pass: divide by name (rounded into n levels)
// 6 pass: divide by generic numeric similarity
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
// 8 pass: resolve remaining collisions via absolute string similarity
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, AirDate, MetaAttributes, SubstringFields, SeriesNameBalancer, SeriesName, RegionHint, SpecialNumber, Numeric, NumericSequence, SeriesRating, VoteRate, TimeStamp, RecentlyAired, FilePathBalancer, FilePath };
}
public SimilarityMetric[] matchFileSequence() {
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, AirDate, MetaAttributes, SubstringFields, SeriesNameBalancer, SeriesName, RegionHint, SpecialNumber, Numeric, NumericSequence, SeriesRating, VoteRate, TimeStamp, RecentlyAired, FilePathBalancer, FilePath };
}
public SimilarityMetric numbers() {
return EpisodeIdentifier;
}
public SimilarityMetric verification() {
return new MetricCascade(FileName, SeasonEpisode, AirDate, Title, Name);
}
public SimilarityMetric sanity() {
return new MetricCascade(new MetricMin(FileSize, 0), FileName, EpisodeIdentifier);
}
}