868 lines
25 KiB
Java
868 lines
25 KiB
Java
package net.filebot.similarity;
|
|
|
|
import static java.util.Arrays.*;
|
|
import static java.util.Collections.*;
|
|
import static java.util.regex.Pattern.*;
|
|
import static java.util.stream.Collectors.*;
|
|
import static net.filebot.Logging.*;
|
|
import static net.filebot.media.MediaDetection.*;
|
|
import static net.filebot.media.XattrMetaInfo.*;
|
|
import static net.filebot.similarity.Normalization.*;
|
|
import static net.filebot.util.FileUtilities.*;
|
|
import static net.filebot.util.StringUtilities.*;
|
|
|
|
import java.io.File;
|
|
import java.time.Instant;
|
|
import java.time.LocalDate;
|
|
import java.time.temporal.ChronoUnit;
|
|
import java.util.Collection;
|
|
import java.util.HashMap;
|
|
import java.util.HashSet;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Optional;
|
|
import java.util.Set;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.stream.Stream;
|
|
|
|
import com.ibm.icu.text.Transliterator;
|
|
|
|
import net.filebot.media.MediaCharacteristics;
|
|
import net.filebot.media.MediaCharacteristicsParser;
|
|
import net.filebot.media.SmartSeasonEpisodeMatcher;
|
|
import net.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
|
import net.filebot.vfs.FileInfo;
|
|
import net.filebot.web.Episode;
|
|
import net.filebot.web.EpisodeFormat;
|
|
import net.filebot.web.Movie;
|
|
import net.filebot.web.SeriesInfo;
|
|
import net.filebot.web.SimpleDate;
|
|
|
|
public class EpisodeMetrics {
|
|
|
|
// Match by season / episode numbers
|
|
public final SimilarityMetric SeasonEpisode = new SeasonEpisodeMetric(new SmartSeasonEpisodeMatcher(null, false)) {
|
|
|
|
private final Map<Object, Collection<SxE>> cache = synchronizedMap(new HashMap<>(64, 4));
|
|
|
|
@Override
|
|
protected Collection<SxE> parse(Object object) {
|
|
// SxE sets for Episode objects cannot be cached because the same Episode (by ID) may have different episode numbers depending on the order (e.g. Airdate VS DVD order)
|
|
if (object instanceof Episode) {
|
|
Episode episode = (Episode) object;
|
|
return parse(episode);
|
|
}
|
|
|
|
if (object instanceof Movie) {
|
|
return emptySet();
|
|
}
|
|
|
|
return cache.computeIfAbsent(object, o -> {
|
|
Collection<SxE> sxe = super.parse(o);
|
|
return sxe == null ? emptySet() : sxe;
|
|
});
|
|
}
|
|
|
|
private Set<SxE> parse(Episode e) {
|
|
// get SxE from episode, both SxE for season/episode numbering and SxE for absolute episode numbering
|
|
Set<SxE> sxe = new HashSet<SxE>(2);
|
|
|
|
// default SxE numbering
|
|
if (e.getEpisode() != null) {
|
|
sxe.add(new SxE(e.getSeason(), e.getEpisode()));
|
|
|
|
// absolute numbering
|
|
if (e.getAbsolute() != null) {
|
|
sxe.add(new SxE(null, e.getAbsolute()));
|
|
}
|
|
} else {
|
|
// 0xSpecial numbering
|
|
if (e.getSpecial() != null) {
|
|
sxe.add(new SxE(0, e.getSpecial()));
|
|
}
|
|
}
|
|
|
|
return sxe;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "SeasonEpisode";
|
|
}
|
|
};
|
|
|
|
// Match episode airdate
|
|
public final SimilarityMetric AirDate = new DateMetric(getDateMatcher()) {
|
|
|
|
private final Map<Object, Optional<SimpleDate>> cache = synchronizedMap(new HashMap<>(64, 4));
|
|
|
|
@Override
|
|
public SimpleDate parse(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode episode = (Episode) object;
|
|
return episode.getAirdate();
|
|
}
|
|
|
|
if (object instanceof Movie) {
|
|
return null;
|
|
}
|
|
|
|
return cache.computeIfAbsent(object, o -> {
|
|
return Optional.ofNullable(super.parse(o));
|
|
}).orElse(null);
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "AirDate";
|
|
}
|
|
};
|
|
|
|
// Match by episode/movie title
|
|
public final SimilarityMetric Title = new SubstringMetric() {
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode e = (Episode) object;
|
|
|
|
// don't use title for matching if title equals series name
|
|
if (e.getTitle() != null) {
|
|
String title = normalizeObject(removeTrailingBrackets(e.getTitle()));
|
|
if (title.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(title)) {
|
|
return title;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (object instanceof Movie) {
|
|
return normalizeObject(((Movie) object).getName());
|
|
}
|
|
|
|
String s = normalizeObject(object);
|
|
return s.length() >= 4 ? s : null; // only consider long enough strings to avoid false matches
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "Title";
|
|
}
|
|
};
|
|
|
|
// Match by SxE and airdate
|
|
public final SimilarityMetric EpisodeIdentifier = new MetricCascade(SeasonEpisode, AirDate);
|
|
|
|
// Advanced episode <-> file matching Lv1
|
|
public final SimilarityMetric EpisodeFunnel = new MetricCascade(SeasonEpisode, AirDate, Title);
|
|
|
|
// Advanced episode <-> file matching Lv2
|
|
public final SimilarityMetric EpisodeBalancer = new SimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
float sxe = EpisodeIdentifier.getSimilarity(o1, o2);
|
|
float title = sxe < 1 ? Title.getSimilarity(o1, o2) : 1; // if SxE matches then boost score as if it was a title match as well
|
|
|
|
// account for misleading SxE patterns in the episode title
|
|
if (sxe < 0 && title == 1 && EpisodeIdentifier.getSimilarity(getTitle(o1), getTitle(o2)) == 1) {
|
|
sxe = 1;
|
|
title = 0;
|
|
}
|
|
|
|
// allow title to override SxE only if series name also is a good match
|
|
if (title == 1 && SeriesName.getSimilarity(o1, o2) < 0.5f) {
|
|
title = 0;
|
|
}
|
|
|
|
// 1:SxE && Title, 2:SxE
|
|
return (float) ((Math.max(sxe, 0) * title) + (Math.floor(sxe) / 10));
|
|
}
|
|
|
|
public Object getTitle(Object o) {
|
|
if (o instanceof Episode) {
|
|
Episode e = (Episode) o;
|
|
return e.getSeriesName() + " " + e.getTitle();
|
|
}
|
|
return o;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "EpisodeBalancer";
|
|
}
|
|
};
|
|
|
|
// Match series title and episode title against folder structure and file name
|
|
public final SimilarityMetric SubstringFields = new SubstringMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
String[] f1 = normalize(fields(o1));
|
|
String[] f2 = normalize(fields(o2));
|
|
|
|
// match all fields and average similarity
|
|
double sum = 0;
|
|
for (int i = 0; i < f1.length; i++) {
|
|
for (int j = 0; j < f2.length; j++) {
|
|
float f = super.getSimilarity(f1[i], f2[j]);
|
|
if (f > 0) {
|
|
// 2-sqrt(x) from 0 to 1
|
|
double multiplier = 2 - Math.sqrt((double) (i + j) / (f1.length + f2.length));
|
|
|
|
// bonus points for primary matches (e.g. primary title matches filename > alias title matches folder path)
|
|
sum += f * multiplier;
|
|
}
|
|
}
|
|
}
|
|
sum /= f1.length * f2.length;
|
|
|
|
return sum >= 0.9 ? 1 : sum >= 0.1 ? 0.5f : 0;
|
|
}
|
|
|
|
protected String[] normalize(Object[] objects) {
|
|
// normalize objects (and make sure to keep word boundaries)
|
|
return stream(objects).map(EpisodeMetrics.this::normalizeObject).toArray(String[]::new);
|
|
}
|
|
|
|
protected final int MAX_FIELDS = 5;
|
|
|
|
protected Object[] fields(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode e = (Episode) object;
|
|
|
|
Stream<String> primaryNames = Stream.of(e.getSeriesName(), e.getTitle());
|
|
Stream<String> aliasNames = e.getSeriesInfo() == null ? Stream.empty() : e.getSeriesInfo().getAliasNames().stream().limit(MAX_FIELDS);
|
|
|
|
Stream<String> names = Stream.concat(primaryNames, aliasNames).filter(s -> s != null && s.length() > 0).map(Normalization::removeTrailingBrackets).distinct();
|
|
return copyOf(names.limit(MAX_FIELDS).toArray(), MAX_FIELDS);
|
|
}
|
|
|
|
if (object instanceof File) {
|
|
File f = (File) object;
|
|
return new Object[] { f, f.getParentFile().getPath() };
|
|
}
|
|
|
|
if (object instanceof Movie) {
|
|
Movie m = (Movie) object;
|
|
return new Object[] { m.getName(), m.getYear() };
|
|
}
|
|
|
|
return new Object[] { object };
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "SubstringFields";
|
|
}
|
|
};
|
|
|
|
// Match via common word sequence in episode name and file name
|
|
public final SimilarityMetric NameSubstringSequence = new SequenceMatchSimilarity() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
|
|
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
|
|
|
|
// match all fields and average similarity
|
|
float max = 0;
|
|
for (String s1 : f1) {
|
|
for (String s2 : f2) {
|
|
max = Math.max(super.getSimilarity(s1, s2), max);
|
|
}
|
|
}
|
|
|
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
|
return (float) (Math.floor(max * 4) / 4);
|
|
}
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
return object.toString();
|
|
}
|
|
|
|
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
|
|
return getEffectiveIdentifiers(object).stream().map(it -> {
|
|
return normalizeObject(it);
|
|
}).toArray(String[]::new);
|
|
}
|
|
|
|
protected Collection<?> getEffectiveIdentifiers(Object object) {
|
|
if (object instanceof Episode) {
|
|
return ((Episode) object).getSeriesNames();
|
|
} else if (object instanceof Movie) {
|
|
return ((Movie) object).getEffectiveNames();
|
|
} else if (object instanceof File) {
|
|
return listPathTail((File) object, 3, true);
|
|
}
|
|
return singleton(object);
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "NameSubstringSequence";
|
|
}
|
|
};
|
|
|
|
// Match by generic name similarity (round rank)
|
|
public final SimilarityMetric Name = new NameSimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
|
return (float) (Math.floor(super.getSimilarity(o1, o2) * 4) / 4);
|
|
}
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
// simplify file name, if possible
|
|
return normalizeObject(object);
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "Name";
|
|
}
|
|
};
|
|
|
|
// Match by generic name similarity (absolute)
|
|
public final SimilarityMetric SeriesName = new NameSimilarityMetric() {
|
|
|
|
private final SeriesNameMatcher seriesNameMatcher = getSeriesNameMatcher(false);
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
|
|
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
|
|
|
|
// match all fields and average similarity
|
|
float max = 0;
|
|
for (String s1 : f1) {
|
|
for (String s2 : f2) {
|
|
max = Math.max(super.getSimilarity(s1, s2), max);
|
|
}
|
|
}
|
|
|
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
|
return (float) (Math.floor(max * 4) / 4);
|
|
}
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
return object.toString();
|
|
}
|
|
|
|
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
|
|
return getEffectiveIdentifiers(object).stream().map(EpisodeMetrics.this::normalizeObject).toArray(String[]::new);
|
|
}
|
|
|
|
protected List<?> getEffectiveIdentifiers(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode episode = (Episode) object;
|
|
|
|
// strip release info from known series name to make sure it matches the stripped filename
|
|
return stripReleaseInfo(episode.getSeriesNames(), true);
|
|
} else if (object instanceof File) {
|
|
File file = (File) object;
|
|
|
|
// guess potential series names from path
|
|
return listPathTail(file, 3, true).stream().map(f -> {
|
|
String fn = getName(f);
|
|
String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
|
|
return sn != null ? sn : fn;
|
|
}).collect(collectingAndThen(toList(), v -> stripReleaseInfo(v, true)));
|
|
}
|
|
|
|
return emptyList();
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "SeriesName";
|
|
}
|
|
};
|
|
|
|
public final SimilarityMetric SeriesNameBalancer = new MetricCascade(NameSubstringSequence, Name, SeriesName);
|
|
|
|
// Match by generic name similarity (absolute)
|
|
public final SimilarityMetric FilePath = new NameSimilarityMetric() {
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
if (object instanceof File) {
|
|
object = normalizePathSeparators(getRelativePathTail((File) object, 3).getPath());
|
|
}
|
|
return normalizeObject(object.toString()); // simplify file name, if possible
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "FilePath";
|
|
}
|
|
};
|
|
|
|
public final SimilarityMetric FilePathBalancer = new NameSimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
String s1 = normalizeObject(o1);
|
|
String s2 = normalizeObject(o2);
|
|
|
|
s1 = stripReleaseInfo(s1, false);
|
|
s2 = stripReleaseInfo(s2, false);
|
|
|
|
int length = Math.min(s1.length(), s2.length());
|
|
s1 = s1.substring(0, length);
|
|
s2 = s2.substring(0, length);
|
|
|
|
return (float) (Math.floor(super.getSimilarity(s1, s2) * 4) / 4);
|
|
};
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
return object.toString();
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "FilePathBalancer";
|
|
}
|
|
};
|
|
|
|
public final SimilarityMetric NumericSequence = new SequenceMatchSimilarity() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
|
|
float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
|
|
|
|
return Math.max(lowerBound, upperBound);
|
|
};
|
|
|
|
@Override
|
|
protected String normalize(Object object) {
|
|
return object.toString();
|
|
};
|
|
|
|
protected String normalize(Object object, boolean numbersOnly) {
|
|
if (object instanceof Episode) {
|
|
Episode e = (Episode) object;
|
|
if (numbersOnly) {
|
|
object = EpisodeFormat.SeasonEpisode.formatSxE(e);
|
|
} else {
|
|
object = String.format("%s %s", e.getSeriesName(), EpisodeFormat.SeasonEpisode.formatSxE(e));
|
|
}
|
|
} else if (object instanceof Movie) {
|
|
Movie m = (Movie) object;
|
|
if (numbersOnly) {
|
|
object = m.getYear();
|
|
} else {
|
|
object = String.format("%s %s", m.getName(), m.getYear());
|
|
}
|
|
}
|
|
|
|
// simplify file name if possible and extract numbers
|
|
List<Integer> numbers = matchIntegers(normalizeObject(object));
|
|
return join(numbers, " ");
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "NumericSequence";
|
|
}
|
|
};
|
|
|
|
// Match by generic numeric similarity
|
|
public final SimilarityMetric Numeric = new NumericSimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
String[] f1 = fields(o1);
|
|
String[] f2 = fields(o2);
|
|
|
|
// match all fields and average similarity
|
|
float max = 0;
|
|
for (String s1 : f1) {
|
|
for (String s2 : f2) {
|
|
if (s1 != null && s2 != null) {
|
|
max = Math.max(super.getSimilarity(s1, s2), max);
|
|
if (max >= 1) {
|
|
return max;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return max;
|
|
}
|
|
|
|
protected String[] fields(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode episode = (Episode) object;
|
|
String[] f = new String[3];
|
|
f[0] = episode.getSeriesName();
|
|
f[1] = episode.getSpecial() == null ? EpisodeFormat.SeasonEpisode.formatSxE(episode) : episode.getSpecial().toString();
|
|
f[2] = episode.getAbsolute() == null ? null : episode.getAbsolute().toString();
|
|
return f;
|
|
}
|
|
|
|
if (object instanceof Movie) {
|
|
Movie movie = (Movie) object;
|
|
return new String[] { movie.getName(), String.valueOf(movie.getYear()) };
|
|
}
|
|
|
|
return new String[] { normalizeObject(object) };
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "Numeric";
|
|
}
|
|
};
|
|
|
|
// Prioritize proper episodes over specials
|
|
public final SimilarityMetric SpecialNumber = new SimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
return getSpecialFactor(o1) + getSpecialFactor(o2);
|
|
}
|
|
|
|
public int getSpecialFactor(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode episode = (Episode) object;
|
|
return episode.getSpecial() != null ? -1 : 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "SpecialNumber";
|
|
}
|
|
};
|
|
|
|
// Match by file length (only works when matching torrents or files)
|
|
public final SimilarityMetric FileSize = new FileSizeMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
// order of arguments is logically irrelevant, but we might be able to save us a call to File.length() which is quite costly
|
|
return o1 instanceof File ? super.getSimilarity(o2, o1) : super.getSimilarity(o1, o2);
|
|
}
|
|
|
|
@Override
|
|
protected long getLength(Object object) {
|
|
if (object instanceof FileInfo) {
|
|
return ((FileInfo) object).getLength();
|
|
}
|
|
return super.getLength(object);
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "FileSize";
|
|
}
|
|
};
|
|
|
|
// Match by common words at the beginning of both files
|
|
public final SimilarityMetric FileName = new FileNameMetric() {
|
|
|
|
@Override
|
|
protected String getFileName(Object object) {
|
|
if (object instanceof File || object instanceof FileInfo) {
|
|
return normalizeObject(object);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "FileName";
|
|
}
|
|
};
|
|
|
|
// Match by file last modified and episode release dates
|
|
public final TimeStampMetric TimeStamp = new TimeStampMetric(10, ChronoUnit.YEARS) {
|
|
|
|
private final Map<File, Long> cache = synchronizedMap(new HashMap<>());
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
// adjust differentiation accuracy to about 2.5 years
|
|
float f = super.getSimilarity(o1, o2);
|
|
|
|
return f >= 0.75 ? 1 : f >= 0 ? 0 : -1;
|
|
}
|
|
|
|
private long getTimeStamp(SimpleDate date) {
|
|
// some episodes may not have a defined airdate
|
|
if (date != null) {
|
|
Instant t = date.toInstant();
|
|
if (t.isBefore(Instant.now())) {
|
|
return t.toEpochMilli();
|
|
}
|
|
}
|
|
|
|
// big penalty for episodes not yet aired
|
|
return -1;
|
|
}
|
|
|
|
private long getTimeStamp(File file) {
|
|
return cache.computeIfAbsent(file, f -> {
|
|
if (MediaCharacteristicsParser.DEFAULT.acceptVideoFile(f)) {
|
|
try (MediaCharacteristics mi = MediaCharacteristicsParser.DEFAULT.open(file)) {
|
|
Instant t = mi.getCreationTime();
|
|
if (t != null) {
|
|
return t.toEpochMilli();
|
|
}
|
|
} catch (Exception e) {
|
|
debug.warning("Failed to read media encoding date: " + e.getMessage());
|
|
}
|
|
}
|
|
|
|
return super.getTimeStamp(file); // default to file creation date
|
|
});
|
|
}
|
|
|
|
@Override
|
|
public long getTimeStamp(Object object) {
|
|
if (object instanceof Episode) {
|
|
Episode e = (Episode) object;
|
|
return getTimeStamp(e.getAirdate());
|
|
} else if (object instanceof Movie) {
|
|
Movie m = (Movie) object;
|
|
return getTimeStamp(new SimpleDate(m.getYear(), 1, 1));
|
|
} else if (object instanceof File) {
|
|
File file = (File) object;
|
|
return getTimeStamp(file);
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "TimeStamp";
|
|
}
|
|
};
|
|
|
|
// Match by recently aired status
|
|
public final SimilarityMetric RecentlyAired = new TimeStampMetric(3, ChronoUnit.DAYS) {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
return super.getSimilarity(o1, o2) > 0 ? 1 : 0;
|
|
}
|
|
|
|
@Override
|
|
public long getTimeStamp(Object object) {
|
|
return object instanceof Episode || object instanceof File ? TimeStamp.getTimeStamp(object) : -1;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "RecentlyAired";
|
|
}
|
|
};
|
|
|
|
public final SimilarityMetric SeriesRating = new SimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
float r1 = getScore(o1);
|
|
float r2 = getScore(o2);
|
|
|
|
if (r1 < 0 || r2 < 0)
|
|
return -1;
|
|
|
|
return Math.max(r1, r2);
|
|
}
|
|
|
|
public float getScore(Object object) {
|
|
if (object instanceof Episode) {
|
|
SeriesInfo seriesInfo = ((Episode) object).getSeriesInfo();
|
|
if (seriesInfo != null && seriesInfo.getRating() != null && seriesInfo.getRatingCount() != null) {
|
|
if (seriesInfo.getRatingCount() >= 20) {
|
|
return (float) Math.floor(seriesInfo.getRating() / 3); // BOOST POPULAR SHOWS and PUT INTO 3 GROUPS
|
|
}
|
|
if (seriesInfo.getRatingCount() >= 1) {
|
|
return 0; // PENALIZE SHOWS WITH FEW RATINGS
|
|
}
|
|
return -1; // BIG PENALTY FOR SHOWS WITH 0 RATINGS
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "SeriesRating";
|
|
}
|
|
};
|
|
|
|
public final SimilarityMetric VoteRate = new SimilarityMetric() {
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
float r1 = getScore(o1);
|
|
float r2 = getScore(o2);
|
|
|
|
return Math.max(r1, r2) >= 0.1 ? 1 : 0;
|
|
}
|
|
|
|
public float getScore(Object object) {
|
|
if (object instanceof Episode) {
|
|
SeriesInfo seriesInfo = ((Episode) object).getSeriesInfo();
|
|
if (seriesInfo != null && seriesInfo.getRating() != null && seriesInfo.getRatingCount() != null && seriesInfo.getStartDate() != null) {
|
|
long days = ChronoUnit.DAYS.between(seriesInfo.getStartDate().toLocalDate(), LocalDate.now());
|
|
if (days > 0) {
|
|
return (float) ((seriesInfo.getRatingCount().doubleValue() / days) * seriesInfo.getRating());
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "VoteRate";
|
|
}
|
|
};
|
|
|
|
// Match by (region) or (year) hints
|
|
public final SimilarityMetric RegionHint = new SimilarityMetric() {
|
|
|
|
private final Pattern hint = compile("[(](\\p{Alpha}+|\\p{Digit}+)[)]$");
|
|
|
|
private final SeriesNameMatcher seriesNameMatcher = getSeriesNameMatcher(true);
|
|
|
|
@Override
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
Set<String> h1 = getHint(o1);
|
|
Set<String> h2 = getHint(o2);
|
|
|
|
return h1.isEmpty() || h2.isEmpty() ? 0 : h1.containsAll(h2) || h2.containsAll(h1) ? 1 : 0;
|
|
}
|
|
|
|
public Set<String> getHint(Object o) {
|
|
if (o instanceof Episode) {
|
|
for (String sn : ((Episode) o).getSeriesNames()) {
|
|
Matcher m = hint.matcher(sn);
|
|
if (m.find()) {
|
|
return singleton(m.group(1).trim().toLowerCase());
|
|
}
|
|
}
|
|
} else if (o instanceof File) {
|
|
Set<String> h = new HashSet<String>();
|
|
for (File f : listPathTail((File) o, 3, true)) {
|
|
// try to focus on series name
|
|
String fn = f.getName();
|
|
String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
|
|
String[] tokens = PUNCTUATION_OR_SPACE.split(sn != null ? sn : fn);
|
|
for (String s : tokens) {
|
|
if (s.length() > 0) {
|
|
h.add(s.trim().toLowerCase());
|
|
}
|
|
}
|
|
}
|
|
return h;
|
|
}
|
|
|
|
return emptySet();
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "RegionHint";
|
|
}
|
|
};
|
|
|
|
// Match by stored MetaAttributes if possible
|
|
public final SimilarityMetric MetaAttributes = new CrossPropertyMetric() {
|
|
|
|
@Override
|
|
protected Map<String, Object> getProperties(Object object) {
|
|
// Episode / Movie objects
|
|
if (object instanceof Episode || object instanceof Movie) {
|
|
return super.getProperties(object);
|
|
}
|
|
|
|
// deserialize MetaAttributes if enabled and available
|
|
if (object instanceof File) {
|
|
Object metaObject = xattr.getMetaInfo((File) object);
|
|
if (metaObject != null) {
|
|
return super.getProperties(metaObject);
|
|
}
|
|
}
|
|
|
|
// ignore everything else
|
|
return emptyMap();
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "MetaAttributes";
|
|
}
|
|
};
|
|
|
|
protected final Map<Object, String> transformCache = synchronizedMap(new HashMap<>(64, 4));
|
|
|
|
protected final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
|
|
|
|
protected String normalizeObject(Object object) {
|
|
if (object == null) {
|
|
return "";
|
|
}
|
|
|
|
return transformCache.computeIfAbsent(object, o -> {
|
|
// 1. convert to string
|
|
// 2. remove checksums, any [...] or (...)
|
|
// 3. remove obvious release info
|
|
// 4. apply transliterator
|
|
// 5. remove or normalize special characters
|
|
return normalizePunctuation(transliterator.transform(stripFormatInfo(removeEmbeddedChecksum(normalizeFileName(o))))).toLowerCase();
|
|
});
|
|
}
|
|
|
|
protected String normalizeFileName(Object object) {
|
|
if (object instanceof File) {
|
|
return getName((File) object);
|
|
} else if (object instanceof FileInfo) {
|
|
return ((FileInfo) object).getName();
|
|
}
|
|
return object.toString();
|
|
}
|
|
|
|
public SimilarityMetric[] matchSequence() {
|
|
// 1 pass: divide by file length (only works for matching torrent entries or files)
|
|
// 2-3 pass: divide by title or season / episode numbers
|
|
// 4 pass: divide by folder / file name and show name / episode title
|
|
// 5 pass: divide by name (rounded into n levels)
|
|
// 6 pass: divide by generic numeric similarity
|
|
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
|
// 8 pass: resolve remaining collisions via absolute string similarity
|
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, AirDate, MetaAttributes, SubstringFields, SeriesNameBalancer, SeriesName, RegionHint, SpecialNumber, Numeric, NumericSequence, SeriesRating, VoteRate, TimeStamp, RecentlyAired, FilePathBalancer, FilePath };
|
|
}
|
|
|
|
public SimilarityMetric[] matchFileSequence() {
|
|
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, AirDate, MetaAttributes, SubstringFields, SeriesNameBalancer, SeriesName, RegionHint, SpecialNumber, Numeric, NumericSequence, SeriesRating, VoteRate, TimeStamp, RecentlyAired, FilePathBalancer, FilePath };
|
|
}
|
|
|
|
public SimilarityMetric numbers() {
|
|
return EpisodeIdentifier;
|
|
}
|
|
|
|
public SimilarityMetric verification() {
|
|
return new MetricCascade(FileName, SeasonEpisode, AirDate, Title, Name);
|
|
}
|
|
|
|
public SimilarityMetric sanity() {
|
|
return new MetricCascade(new MetricMin(FileSize, 0), FileName, EpisodeIdentifier);
|
|
}
|
|
|
|
}
|