2009-07-26 12:54:24 -04:00
|
|
|
|
|
2011-11-23 05:52:46 -05:00
|
|
|
|
package net.sourceforge.filebot.similarity;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
|
|
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
|
import static java.lang.Math.*;
|
2011-11-13 13:29:25 -05:00
|
|
|
|
import static java.util.Arrays.*;
|
|
|
|
|
import static java.util.Collections.*;
|
2011-11-21 09:51:19 -05:00
|
|
|
|
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
|
2011-10-28 02:28:19 -04:00
|
|
|
|
import static net.sourceforge.tuned.FileUtilities.*;
|
2009-08-10 07:46:24 -04:00
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.util.Collection;
|
2011-11-13 13:29:25 -05:00
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.WeakHashMap;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
|
|
|
|
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
2011-11-24 12:27:39 -05:00
|
|
|
|
import net.sourceforge.filebot.vfs.FileInfo;
|
2010-10-24 12:33:38 -04:00
|
|
|
|
import net.sourceforge.filebot.web.Date;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
import net.sourceforge.filebot.web.Episode;
|
2011-12-07 02:08:04 -05:00
|
|
|
|
import net.sourceforge.filebot.web.EpisodeFormat;
|
2011-10-28 04:07:02 -04:00
|
|
|
|
import net.sourceforge.filebot.web.Movie;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
|
|
|
|
|
|
2011-11-23 05:52:46 -05:00
|
|
|
|
public enum EpisodeMetrics implements SimilarityMetric {
|
2009-07-26 12:54:24 -04:00
|
|
|
|
|
|
|
|
|
// Match by season / episode numbers
|
2010-10-24 12:33:38 -04:00
|
|
|
|
SeasonEpisode(new SeasonEpisodeMetric() {
|
2009-07-26 12:54:24 -04:00
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new WeakHashMap<Object, Collection<SxE>>(64, 4));
|
2011-11-13 13:29:25 -05:00
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
@Override
|
|
|
|
|
protected Collection<SxE> parse(Object object) {
|
2011-11-13 21:02:14 -05:00
|
|
|
|
if (object instanceof Movie) {
|
|
|
|
|
return emptySet();
|
|
|
|
|
}
|
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
Collection<SxE> result = transformCache.get(object);
|
2011-11-13 13:29:25 -05:00
|
|
|
|
if (result != null) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
if (object instanceof Episode) {
|
|
|
|
|
Episode episode = (Episode) object;
|
|
|
|
|
|
2010-11-09 03:04:12 -05:00
|
|
|
|
// get SxE from episode, both SxE for season/episode numbering and SxE for absolute episode numbering
|
2011-11-27 10:41:42 -05:00
|
|
|
|
SxE sxe = new SxE(episode.getSeason(), episode.getEpisode());
|
|
|
|
|
SxE abs = new SxE(null, episode.getAbsolute());
|
2010-11-09 03:04:12 -05:00
|
|
|
|
|
2011-11-27 10:41:42 -05:00
|
|
|
|
result = (abs.episode < 0 || sxe.equals(abs)) ? singleton(sxe) : asList(sxe, abs);
|
2011-11-13 13:29:25 -05:00
|
|
|
|
} else {
|
|
|
|
|
result = super.parse(object);
|
2010-10-24 12:33:38 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
transformCache.put(object, result);
|
2011-11-13 13:29:25 -05:00
|
|
|
|
return result;
|
2010-10-24 12:33:38 -04:00
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// Match episode airdate
|
|
|
|
|
AirDate(new DateMetric() {
|
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
private final Map<Object, Date> transformCache = synchronizedMap(new WeakHashMap<Object, Date>(64, 4));
|
2011-11-13 13:29:25 -05:00
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2010-10-24 12:33:38 -04:00
|
|
|
|
@Override
|
2011-12-22 14:36:31 -05:00
|
|
|
|
public Date parse(Object object) {
|
2011-11-13 21:02:14 -05:00
|
|
|
|
if (object instanceof Movie) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-24 12:33:38 -04:00
|
|
|
|
if (object instanceof Episode) {
|
|
|
|
|
Episode episode = (Episode) object;
|
|
|
|
|
|
2011-11-13 13:29:25 -05:00
|
|
|
|
// use airdate from episode
|
2010-10-24 12:33:38 -04:00
|
|
|
|
return episode.airdate();
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
Date result = transformCache.get(object);
|
2011-11-13 13:29:25 -05:00
|
|
|
|
if (result != null) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = super.parse(object);
|
2011-11-22 11:08:36 -05:00
|
|
|
|
transformCache.put(object, result);
|
2011-11-13 13:29:25 -05:00
|
|
|
|
return result;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
// Match by episode/movie title
|
|
|
|
|
Title(new SubstringMetric() {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
protected String normalize(Object object) {
|
|
|
|
|
if (object instanceof Episode) {
|
|
|
|
|
object = ((Episode) object).getTitle();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (object instanceof Movie) {
|
|
|
|
|
object = ((Movie) object).getName();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return normalizeObject(object);
|
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
|
2011-11-22 13:26:50 -05:00
|
|
|
|
// Match by SxE and airdate
|
|
|
|
|
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
|
|
|
|
|
|
2011-11-27 09:35:53 -05:00
|
|
|
|
// Advanced episode <-> file matching
|
2011-11-22 13:26:50 -05:00
|
|
|
|
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)),
|
|
|
|
|
EpisodeBalancer(new SimilarityMetric() {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
|
float sxe = EpisodeIdentifier.getSimilarity(o1, o2);
|
|
|
|
|
float title = Title.getSimilarity(o1, o2);
|
|
|
|
|
|
|
|
|
|
// 1:SxE && Title, 2:SxE
|
2011-11-27 09:35:53 -05:00
|
|
|
|
return (float) ((max(sxe, 0) * title) + (floor(sxe) / 10));
|
2011-11-22 13:26:50 -05:00
|
|
|
|
}
|
|
|
|
|
}),
|
2011-11-22 11:08:36 -05:00
|
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
|
// Match series title and episode title against folder structure and file name
|
2011-11-13 21:02:14 -05:00
|
|
|
|
SubstringFields(new SubstringMetric() {
|
2011-10-28 02:28:19 -04:00
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2011-11-08 13:26:54 -05:00
|
|
|
|
String[] f1 = normalize(fields(o1));
|
|
|
|
|
String[] f2 = normalize(fields(o2));
|
2011-10-28 02:28:19 -04:00
|
|
|
|
|
|
|
|
|
// match all fields and average similarity
|
|
|
|
|
float sum = 0;
|
|
|
|
|
for (String s1 : f1) {
|
|
|
|
|
for (String s2 : f2) {
|
|
|
|
|
sum += super.getSimilarity(s1, s2);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
sum /= f1.length * f2.length;
|
|
|
|
|
|
|
|
|
|
// normalize into 3 similarity levels
|
|
|
|
|
return (float) (ceil(sum * 3) / 3);
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
|
protected String[] normalize(Object[] objects) {
|
|
|
|
|
String[] names = new String[objects.length];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < objects.length; i++) {
|
|
|
|
|
names[i] = normalizeObject(objects[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return names;
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
|
protected Object[] fields(Object object) {
|
2011-10-28 02:28:19 -04:00
|
|
|
|
if (object instanceof Episode) {
|
2011-10-28 04:07:02 -04:00
|
|
|
|
Episode episode = (Episode) object;
|
2011-11-08 13:26:54 -05:00
|
|
|
|
return new Object[] { episode.getSeriesName(), episode.getTitle() };
|
2011-10-28 02:28:19 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (object instanceof File) {
|
|
|
|
|
File file = (File) object;
|
2011-11-22 11:08:36 -05:00
|
|
|
|
return new Object[] { file.getParentFile().getAbsolutePath(), file };
|
2011-10-28 04:07:02 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (object instanceof Movie) {
|
|
|
|
|
Movie movie = (Movie) object;
|
2011-11-08 13:26:54 -05:00
|
|
|
|
return new Object[] { movie.getName(), movie.getYear() };
|
2011-10-28 04:07:02 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
|
return new Object[] { object };
|
2011-10-28 02:28:19 -04:00
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
// Match by generic name similarity
|
|
|
|
|
Name(new NameSimilarityMetric() {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2011-12-07 00:43:56 -05:00
|
|
|
|
// normalize absolute similarity to similarity rank (6 ranks in total),
|
2009-07-26 12:54:24 -04:00
|
|
|
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
2011-12-07 00:43:56 -05:00
|
|
|
|
return (float) (floor(super.getSimilarity(o1, o2) * 6) / 6);
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
@Override
|
|
|
|
|
protected String normalize(Object object) {
|
|
|
|
|
// simplify file name, if possible
|
2011-11-08 13:26:54 -05:00
|
|
|
|
return normalizeObject(object);
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// Match by generic numeric similarity
|
|
|
|
|
Numeric(new NumericSimilarityMetric() {
|
|
|
|
|
|
2011-12-07 02:08:04 -05:00
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
|
String[] f1 = fields(o1);
|
|
|
|
|
String[] f2 = fields(o2);
|
|
|
|
|
|
|
|
|
|
// match all fields and average similarity
|
|
|
|
|
float sum = 0;
|
|
|
|
|
for (String s1 : f1) {
|
|
|
|
|
for (String s2 : f2) {
|
|
|
|
|
sum += super.getSimilarity(s1, s2);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return sum / (f1.length * f2.length);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected String[] fields(Object object) {
|
|
|
|
|
if (object instanceof Episode) {
|
|
|
|
|
Episode episode = (Episode) object;
|
|
|
|
|
return new String[] { EpisodeFormat.SeasonEpisode.formatSxE(episode), String.valueOf(episode.getAbsolute()) };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (object instanceof Movie) {
|
|
|
|
|
Movie movie = (Movie) object;
|
|
|
|
|
return new String[] { String.valueOf(movie.getYear()) };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new String[] { normalizeObject(object) };
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
2011-11-22 11:08:36 -05:00
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// Match by file length (only works when matching torrents or files)
|
|
|
|
|
FileSize(new FileSizeMetric() {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
|
// order of arguments is logically irrelevant, but we might be able to save us a call to File.length() which is quite costly
|
|
|
|
|
return o1 instanceof File ? super.getSimilarity(o2, o1) : super.getSimilarity(o1, o2);
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
|
@Override
|
|
|
|
|
protected long getLength(Object object) {
|
2011-11-24 12:27:39 -05:00
|
|
|
|
if (object instanceof FileInfo) {
|
|
|
|
|
return ((FileInfo) object).getLength();
|
2011-11-22 11:08:36 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return super.getLength(object);
|
|
|
|
|
}
|
2011-11-24 12:27:39 -05:00
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// Match by common words at the beginning of both files
|
|
|
|
|
FileName(new FileNameMetric() {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
protected String getFileName(Object object) {
|
|
|
|
|
if (object instanceof File || object instanceof FileInfo) {
|
|
|
|
|
return normalizeObject(object);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2009-07-26 12:54:24 -04:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// inner metric
|
|
|
|
|
private final SimilarityMetric metric;
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-23 05:52:46 -05:00
|
|
|
|
private EpisodeMetrics(SimilarityMetric metric) {
|
2009-07-26 12:54:24 -04:00
|
|
|
|
this.metric = metric;
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
@Override
|
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
|
return metric.getSimilarity(o1, o2);
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
|
protected static String normalizeObject(Object object) {
|
2009-07-26 12:54:24 -04:00
|
|
|
|
String name = object.toString();
|
|
|
|
|
|
|
|
|
|
// use name without extension
|
|
|
|
|
if (object instanceof File) {
|
2011-10-28 02:28:19 -04:00
|
|
|
|
name = getName((File) object);
|
2011-11-24 12:27:39 -05:00
|
|
|
|
} else if (object instanceof FileInfo) {
|
|
|
|
|
name = ((FileInfo) object).getName();
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-11-21 09:51:19 -05:00
|
|
|
|
// remove checksums, any [...] or (...)
|
|
|
|
|
name = removeEmbeddedChecksum(name);
|
2011-11-08 13:26:54 -05:00
|
|
|
|
|
|
|
|
|
// remove/normalize special characters
|
|
|
|
|
name = name.replaceAll("['`´]+", "");
|
|
|
|
|
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
|
|
|
|
|
|
|
|
|
return name.trim().toLowerCase();
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-13 13:29:25 -05:00
|
|
|
|
public static SimilarityMetric[] defaultSequence(boolean includeFileMetrics) {
|
2009-07-26 12:54:24 -04:00
|
|
|
|
// 1. pass: match by file length (fast, but only works when matching torrents or files)
|
|
|
|
|
// 2. pass: match by season / episode numbers
|
2011-11-13 13:29:25 -05:00
|
|
|
|
// 3. pass: match by checking series / episode title against the file path
|
2011-10-28 02:28:19 -04:00
|
|
|
|
// 4. pass: match by generic name similarity (slow, but most matches will have been determined in second pass)
|
|
|
|
|
// 5. pass: match by generic numeric similarity
|
2011-11-13 13:29:25 -05:00
|
|
|
|
if (includeFileMetrics) {
|
2011-11-24 12:27:39 -05:00
|
|
|
|
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, Name, Numeric };
|
2011-11-13 13:29:25 -05:00
|
|
|
|
} else {
|
2011-11-22 13:26:50 -05:00
|
|
|
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, Name, Numeric };
|
2011-11-13 13:29:25 -05:00
|
|
|
|
}
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|
|
|
|
|
|
2011-12-07 00:43:56 -05:00
|
|
|
|
|
2011-11-27 09:35:53 -05:00
|
|
|
|
public static SimilarityMetric verificationMetric() {
|
|
|
|
|
return new MetricCascade(FileSize, FileName, SeasonEpisode, AirDate, Title, Name);
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
|
}
|