2011-11-23 05:52:46 -05:00
|
|
|
package net.sourceforge.filebot.similarity;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
2013-09-11 13:22:00 -04:00
|
|
|
import static java.lang.Math.*;
|
|
|
|
import static java.util.Arrays.*;
|
|
|
|
import static java.util.Collections.*;
|
2013-10-29 14:34:39 -04:00
|
|
|
import static java.util.regex.Pattern.*;
|
2013-09-11 13:22:00 -04:00
|
|
|
import static net.sourceforge.filebot.Settings.*;
|
|
|
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
|
|
|
import static net.sourceforge.tuned.FileUtilities.*;
|
|
|
|
import static net.sourceforge.tuned.StringUtilities.*;
|
2009-08-10 07:46:24 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
import java.io.File;
|
2013-04-06 13:49:27 -04:00
|
|
|
import java.io.IOException;
|
2013-02-15 04:50:23 -05:00
|
|
|
import java.util.ArrayList;
|
2009-07-26 12:54:24 -04:00
|
|
|
import java.util.Collection;
|
2012-07-24 16:01:48 -04:00
|
|
|
import java.util.HashMap;
|
2013-10-29 14:34:39 -04:00
|
|
|
import java.util.HashSet;
|
2013-09-06 03:55:13 -04:00
|
|
|
import java.util.Iterator;
|
|
|
|
import java.util.LinkedHashSet;
|
2013-02-15 04:50:23 -05:00
|
|
|
import java.util.List;
|
2013-04-06 13:49:27 -04:00
|
|
|
import java.util.Locale;
|
2011-11-13 13:29:25 -05:00
|
|
|
import java.util.Map;
|
2013-04-06 13:49:27 -04:00
|
|
|
import java.util.NoSuchElementException;
|
2013-02-15 04:50:23 -05:00
|
|
|
import java.util.Scanner;
|
2013-10-29 14:34:39 -04:00
|
|
|
import java.util.Set;
|
2013-04-06 13:49:27 -04:00
|
|
|
import java.util.logging.Level;
|
|
|
|
import java.util.logging.Logger;
|
2013-10-29 14:34:39 -04:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
2013-04-06 13:49:27 -04:00
|
|
|
import net.sourceforge.filebot.WebServices;
|
2013-04-02 11:34:25 -04:00
|
|
|
import net.sourceforge.filebot.media.ReleaseInfo;
|
2009-07-26 12:54:24 -04:00
|
|
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
2011-11-24 12:27:39 -05:00
|
|
|
import net.sourceforge.filebot.vfs.FileInfo;
|
2010-10-24 12:33:38 -04:00
|
|
|
import net.sourceforge.filebot.web.Date;
|
2009-07-26 12:54:24 -04:00
|
|
|
import net.sourceforge.filebot.web.Episode;
|
2011-12-07 02:08:04 -05:00
|
|
|
import net.sourceforge.filebot.web.EpisodeFormat;
|
2011-10-28 04:07:02 -04:00
|
|
|
import net.sourceforge.filebot.web.Movie;
|
2013-04-06 13:49:27 -04:00
|
|
|
import net.sourceforge.filebot.web.TheTVDBClient.SeriesInfo;
|
2013-07-13 06:40:47 -04:00
|
|
|
import net.sourceforge.filebot.web.TheTVDBSearchResult;
|
2009-07-26 12:54:24 -04:00
|
|
|
|
2012-12-02 08:41:06 -05:00
|
|
|
import com.ibm.icu.text.Transliterator;
|
|
|
|
|
2011-11-23 05:52:46 -05:00
|
|
|
public enum EpisodeMetrics implements SimilarityMetric {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
// Match by season / episode numbers
|
2010-10-24 12:33:38 -04:00
|
|
|
SeasonEpisode(new SeasonEpisodeMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-24 16:01:48 -04:00
|
|
|
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new HashMap<Object, Collection<SxE>>(64, 4));
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
@Override
|
|
|
|
protected Collection<SxE> parse(Object object) {
|
2011-11-13 21:02:14 -05:00
|
|
|
if (object instanceof Movie) {
|
|
|
|
return emptySet();
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
Collection<SxE> result = transformCache.get(object);
|
2011-11-13 13:29:25 -05:00
|
|
|
if (result != null) {
|
|
|
|
return result;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
if (object instanceof Episode) {
|
|
|
|
Episode episode = (Episode) object;
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-21 11:47:49 -04:00
|
|
|
if (episode.getSpecial() != null) {
|
2012-09-08 03:25:18 -04:00
|
|
|
return singleton(new SxE(0, episode.getSpecial()));
|
2012-03-30 20:59:53 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-21 11:47:49 -04:00
|
|
|
// get SxE from episode, both SxE for season/episode numbering and SxE for absolute episode numbering
|
|
|
|
SxE sxe = new SxE(episode.getSeason(), episode.getEpisode());
|
|
|
|
SxE abs = new SxE(null, episode.getAbsolute());
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-21 11:47:49 -04:00
|
|
|
result = (abs.episode < 0 || sxe.equals(abs)) ? singleton(sxe) : asList(sxe, abs);
|
2011-11-13 13:29:25 -05:00
|
|
|
} else {
|
|
|
|
result = super.parse(object);
|
2010-10-24 12:33:38 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
transformCache.put(object, result);
|
2011-11-13 13:29:25 -05:00
|
|
|
return result;
|
2010-10-24 12:33:38 -04:00
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2010-10-24 12:33:38 -04:00
|
|
|
// Match episode airdate
|
|
|
|
AirDate(new DateMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-24 16:01:48 -04:00
|
|
|
private final Map<Object, Date> transformCache = synchronizedMap(new HashMap<Object, Date>(64, 4));
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2010-10-24 12:33:38 -04:00
|
|
|
@Override
|
2011-12-22 14:36:31 -05:00
|
|
|
public Date parse(Object object) {
|
2011-11-13 21:02:14 -05:00
|
|
|
if (object instanceof Movie) {
|
|
|
|
return null;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2010-10-24 12:33:38 -04:00
|
|
|
if (object instanceof Episode) {
|
|
|
|
Episode episode = (Episode) object;
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-13 13:29:25 -05:00
|
|
|
// use airdate from episode
|
2013-07-13 06:01:33 -04:00
|
|
|
return episode.getAirdate();
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
Date result = transformCache.get(object);
|
2011-11-13 13:29:25 -05:00
|
|
|
if (result != null) {
|
|
|
|
return result;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-13 13:29:25 -05:00
|
|
|
result = super.parse(object);
|
2011-11-22 11:08:36 -05:00
|
|
|
transformCache.put(object, result);
|
2011-11-13 13:29:25 -05:00
|
|
|
return result;
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
// Match by episode/movie title
|
|
|
|
Title(new SubstringMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
@Override
|
|
|
|
protected String normalize(Object object) {
|
|
|
|
if (object instanceof Episode) {
|
2012-02-08 08:16:41 -05:00
|
|
|
Episode e = (Episode) object;
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-02-08 08:16:41 -05:00
|
|
|
// don't use title for matching if title equals series name
|
2013-11-07 02:45:30 -05:00
|
|
|
String normalizedToken = normalizeObject(removeTrailingBrackets(e.getTitle()));
|
2013-09-27 04:50:41 -04:00
|
|
|
if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) {
|
2012-03-20 14:18:34 -04:00
|
|
|
return normalizedToken;
|
2012-02-08 08:16:41 -05:00
|
|
|
}
|
2011-11-22 11:08:36 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
if (object instanceof Movie) {
|
|
|
|
object = ((Movie) object).getName();
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
return normalizeObject(object);
|
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 13:26:50 -05:00
|
|
|
// Match by SxE and airdate
|
|
|
|
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
|
|
|
// Advanced episode <-> file matching Lv1
|
2013-02-21 02:42:29 -05:00
|
|
|
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-21 02:42:29 -05:00
|
|
|
// Advanced episode <-> file matching Lv2
|
|
|
|
EpisodeBalancer(new SimilarityMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 13:26:50 -05:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
float sxe = EpisodeIdentifier.getSimilarity(o1, o2);
|
|
|
|
float title = Title.getSimilarity(o1, o2);
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-03-27 02:06:10 -04:00
|
|
|
// account for misleading SxE patterns in the episode title
|
|
|
|
if (sxe < 0 && title == 1 && EpisodeIdentifier.getSimilarity(getTitle(o1), getTitle(o2)) == 1) {
|
|
|
|
sxe = 1;
|
|
|
|
title = 0;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-09-27 05:08:20 -04:00
|
|
|
// allow title to override SxE only if series name also is a good match
|
|
|
|
if (title == 1 && SeriesName.getSimilarity(o1, o2) < 0.5f) {
|
|
|
|
title = 0;
|
|
|
|
}
|
|
|
|
|
2011-11-22 13:26:50 -05:00
|
|
|
// 1:SxE && Title, 2:SxE
|
2011-11-27 09:35:53 -05:00
|
|
|
return (float) ((max(sxe, 0) * title) + (floor(sxe) / 10));
|
2011-11-22 13:26:50 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-03-27 02:06:10 -04:00
|
|
|
public Object getTitle(Object o) {
|
|
|
|
if (o instanceof Episode) {
|
2013-03-27 08:33:23 -04:00
|
|
|
Episode e = (Episode) o;
|
|
|
|
return String.format("%s %s", e.getSeriesName(), e.getTitle());
|
2013-03-27 02:06:10 -04:00
|
|
|
}
|
|
|
|
return o;
|
|
|
|
}
|
2011-11-22 13:26:50 -05:00
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
// Match series title and episode title against folder structure and file name
|
2011-11-13 21:02:14 -05:00
|
|
|
SubstringFields(new SubstringMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2011-11-08 13:26:54 -05:00
|
|
|
String[] f1 = normalize(fields(o1));
|
|
|
|
String[] f2 = normalize(fields(o2));
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
// match all fields and average similarity
|
|
|
|
float sum = 0;
|
|
|
|
for (String s1 : f1) {
|
|
|
|
for (String s2 : f2) {
|
|
|
|
sum += super.getSimilarity(s1, s2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sum /= f1.length * f2.length;
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
// normalize into 3 similarity levels
|
|
|
|
return (float) (ceil(sum * 3) / 3);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
protected String[] normalize(Object[] objects) {
|
|
|
|
String[] names = new String[objects.length];
|
|
|
|
for (int i = 0; i < objects.length; i++) {
|
2013-01-30 06:50:58 -05:00
|
|
|
names[i] = normalizeObject(objects[i]).replaceAll("\\s", "");
|
2011-11-08 13:26:54 -05:00
|
|
|
}
|
|
|
|
return names;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
protected Object[] fields(Object object) {
|
2011-10-28 02:28:19 -04:00
|
|
|
if (object instanceof Episode) {
|
2011-10-28 04:07:02 -04:00
|
|
|
Episode episode = (Episode) object;
|
2013-09-06 03:55:13 -04:00
|
|
|
LinkedHashSet<String> set = new LinkedHashSet<String>(4);
|
|
|
|
set.add(removeTrailingBrackets(episode.getSeriesName()));
|
|
|
|
set.add(removeTrailingBrackets(episode.getTitle()));
|
2013-10-13 10:50:45 -04:00
|
|
|
for (String it : episode.getSeries().getEffectiveNames()) {
|
2013-09-06 03:55:13 -04:00
|
|
|
set.add(removeTrailingBrackets(it));
|
2013-05-09 09:47:03 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
|
|
|
Iterator<String> itr = set.iterator();
|
|
|
|
Object[] f = new Object[4];
|
|
|
|
for (int i = 0; i < f.length; i++) {
|
|
|
|
f[i] = itr.hasNext() ? itr.next() : null;
|
|
|
|
}
|
|
|
|
return f;
|
2011-10-28 02:28:19 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-10-28 02:28:19 -04:00
|
|
|
if (object instanceof File) {
|
|
|
|
File file = (File) object;
|
2011-11-22 11:08:36 -05:00
|
|
|
return new Object[] { file.getParentFile().getAbsolutePath(), file };
|
2011-10-28 04:07:02 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-10-28 04:07:02 -04:00
|
|
|
if (object instanceof Movie) {
|
|
|
|
Movie movie = (Movie) object;
|
2011-11-08 13:26:54 -05:00
|
|
|
return new Object[] { movie.getName(), movie.getYear() };
|
2011-10-28 04:07:02 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
return new Object[] { object };
|
2011-10-28 02:28:19 -04:00
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
// Match via common word sequence in episode name and file name
|
2013-02-15 04:50:23 -05:00
|
|
|
NameSubstringSequence(new SequenceMatchSimilarity() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2013-01-30 19:39:47 -05:00
|
|
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
2012-01-03 04:23:03 -05:00
|
|
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
2013-01-30 19:39:47 -05:00
|
|
|
return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4);
|
2012-01-03 04:23:03 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
@Override
|
|
|
|
protected String normalize(Object object) {
|
2013-02-15 04:50:23 -05:00
|
|
|
if (object instanceof Episode) {
|
|
|
|
object = removeTrailingBrackets(((Episode) object).getSeriesName());
|
|
|
|
} else if (object instanceof Movie) {
|
|
|
|
object = ((Movie) object).getName();
|
|
|
|
} else if (object instanceof File) {
|
|
|
|
object = getNameWithoutExtension(getRelativePathTail((File) object, 3).getPath());
|
|
|
|
}
|
2012-01-03 04:23:03 -05:00
|
|
|
// simplify file name, if possible
|
|
|
|
return normalizeObject(object);
|
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-15 04:50:23 -05:00
|
|
|
// Match by generic name similarity (round rank)
|
2009-07-26 12:54:24 -04:00
|
|
|
Name(new NameSimilarityMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2013-02-01 03:12:15 -05:00
|
|
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
2009-07-26 12:54:24 -04:00
|
|
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
2013-02-01 03:12:15 -05:00
|
|
|
return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4);
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
@Override
|
|
|
|
protected String normalize(Object object) {
|
|
|
|
// simplify file name, if possible
|
2011-11-08 13:26:54 -05:00
|
|
|
return normalizeObject(object);
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-15 04:50:23 -05:00
|
|
|
// Match by generic name similarity (absolute)
|
2013-04-02 11:34:25 -04:00
|
|
|
SeriesName(new NameSimilarityMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
private ReleaseInfo releaseInfo = new ReleaseInfo();
|
|
|
|
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
|
|
|
|
float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
return (float) (floor(max(lowerBound, upperBound) * 4) / 4);
|
|
|
|
};
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-01 03:12:15 -05:00
|
|
|
@Override
|
|
|
|
protected String normalize(Object object) {
|
2013-04-02 11:34:25 -04:00
|
|
|
return object.toString();
|
|
|
|
};
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
protected String normalize(Object object, boolean strict) {
|
|
|
|
if (object instanceof Episode) {
|
|
|
|
if (strict) {
|
|
|
|
object = ((Episode) object).getSeriesName(); // focus on series name
|
|
|
|
} else {
|
|
|
|
object = removeTrailingBrackets(((Episode) object).getSeriesName()); // focus on series name (without US/UK 1967/2005 differentiation)
|
|
|
|
}
|
|
|
|
} else if (object instanceof File) {
|
|
|
|
object = ((File) object).getName(); // try to narrow down on series name
|
|
|
|
String sn = seriesNameMatcher.matchByEpisodeIdentifier(object.toString());
|
|
|
|
if (sn != null) {
|
|
|
|
object = sn;
|
|
|
|
}
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
// equally strip away strip potential any clutter
|
|
|
|
try {
|
|
|
|
object = releaseInfo.cleanRelease(singleton(object.toString()), strict).iterator().next();
|
2013-04-06 13:49:27 -04:00
|
|
|
} catch (NoSuchElementException e) {
|
2013-04-02 11:34:25 -04:00
|
|
|
// keep default value in case all tokens are stripped away
|
2013-04-06 13:49:27 -04:00
|
|
|
} catch (IOException e) {
|
|
|
|
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
2013-04-02 11:34:25 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-01 03:12:15 -05:00
|
|
|
// simplify file name, if possible
|
2013-02-15 04:50:23 -05:00
|
|
|
return normalizeObject(object);
|
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
// Match by generic name similarity (absolute)
|
|
|
|
AbsolutePath(new NameSimilarityMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-02 11:34:25 -04:00
|
|
|
@Override
|
|
|
|
protected String normalize(Object object) {
|
|
|
|
if (object instanceof File) {
|
|
|
|
object = normalizePathSeparators(getRelativePathTail((File) object, 3).getPath());
|
|
|
|
}
|
|
|
|
return normalizeObject(object.toString()); // simplify file name, if possible
|
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-15 04:50:23 -05:00
|
|
|
NumericSequence(new SequenceMatchSimilarity() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-06 14:37:46 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
|
|
|
|
float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-06 14:37:46 -04:00
|
|
|
return max(lowerBound, upperBound);
|
|
|
|
};
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-15 04:50:23 -05:00
|
|
|
@Override
|
|
|
|
protected String normalize(Object object) {
|
2013-04-06 14:37:46 -04:00
|
|
|
return object.toString();
|
|
|
|
};
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-06 14:37:46 -04:00
|
|
|
protected String normalize(Object object, boolean numbersOnly) {
|
2013-02-15 04:50:23 -05:00
|
|
|
if (object instanceof Episode) {
|
2013-03-27 05:05:52 -04:00
|
|
|
Episode e = (Episode) object;
|
2013-04-06 14:37:46 -04:00
|
|
|
if (numbersOnly) {
|
|
|
|
object = EpisodeFormat.SeasonEpisode.formatSxE(e);
|
|
|
|
} else {
|
|
|
|
object = String.format("%s %s", e.getSeriesName(), EpisodeFormat.SeasonEpisode.formatSxE(e));
|
|
|
|
}
|
2013-02-15 04:50:23 -05:00
|
|
|
} else if (object instanceof Movie) {
|
2013-04-06 14:37:46 -04:00
|
|
|
Movie m = (Movie) object;
|
|
|
|
if (numbersOnly) {
|
|
|
|
object = m.getYear();
|
|
|
|
} else {
|
|
|
|
object = String.format("%s %s", m.getName(), m.getYear());
|
|
|
|
}
|
2013-02-15 04:50:23 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-02-15 04:50:23 -05:00
|
|
|
// simplify file name if possible and extract numbers
|
|
|
|
List<Integer> numbers = new ArrayList<Integer>(4);
|
|
|
|
Scanner scanner = new Scanner(normalizeObject(object)).useDelimiter("\\D+");
|
|
|
|
while (scanner.hasNextInt()) {
|
|
|
|
numbers.add(scanner.nextInt());
|
|
|
|
}
|
|
|
|
return join(numbers, " ");
|
2013-02-01 03:12:15 -05:00
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
// Match by generic numeric similarity
|
|
|
|
Numeric(new NumericSimilarityMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-21 11:47:49 -04:00
|
|
|
@Override
|
2011-12-07 02:08:04 -05:00
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
String[] f1 = fields(o1);
|
|
|
|
String[] f2 = fields(o2);
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-12-07 02:08:04 -05:00
|
|
|
// match all fields and average similarity
|
2013-03-04 02:35:20 -05:00
|
|
|
float max = 0;
|
2011-12-07 02:08:04 -05:00
|
|
|
for (String s1 : f1) {
|
|
|
|
for (String s2 : f2) {
|
2013-10-14 23:22:47 -04:00
|
|
|
if (s1 != null && s2 != null) {
|
|
|
|
max = max(super.getSimilarity(s1, s2), max);
|
|
|
|
}
|
2011-12-07 02:08:04 -05:00
|
|
|
}
|
|
|
|
}
|
2013-03-04 02:35:20 -05:00
|
|
|
return max;
|
2011-12-07 02:08:04 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-12-07 02:08:04 -05:00
|
|
|
protected String[] fields(Object object) {
|
|
|
|
if (object instanceof Episode) {
|
|
|
|
Episode episode = (Episode) object;
|
2013-10-14 23:22:47 -04:00
|
|
|
String[] f = new String[4];
|
|
|
|
f[0] = episode.getSeriesName();
|
|
|
|
f[1] = EpisodeFormat.SeasonEpisode.formatSxE(episode);
|
|
|
|
f[2] = episode.getAbsolute() == null ? null : episode.getAbsolute().toString();
|
|
|
|
f[3] = episode.getSeason() == null || episode.getEpisode() == null ? null : String.format("%02d%02d", episode.getSeason(), episode.getEpisode());
|
|
|
|
return f;
|
2011-12-07 02:08:04 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-12-07 02:08:04 -05:00
|
|
|
if (object instanceof Movie) {
|
|
|
|
Movie movie = (Movie) object;
|
2013-03-27 05:05:52 -04:00
|
|
|
return new String[] { movie.getName(), String.valueOf(movie.getYear()) };
|
2011-12-07 02:08:04 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-12-07 02:08:04 -05:00
|
|
|
return new String[] { normalizeObject(object) };
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
2011-11-22 11:08:36 -05:00
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
// Match by file length (only works when matching torrents or files)
|
|
|
|
FileSize(new FileSizeMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
// order of arguments is logically irrelevant, but we might be able to save us a call to File.length() which is quite costly
|
|
|
|
return o1 instanceof File ? super.getSimilarity(o2, o1) : super.getSimilarity(o1, o2);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
@Override
|
|
|
|
protected long getLength(Object object) {
|
2011-11-24 12:27:39 -05:00
|
|
|
if (object instanceof FileInfo) {
|
|
|
|
return ((FileInfo) object).getLength();
|
2011-11-22 11:08:36 -05:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-22 11:08:36 -05:00
|
|
|
return super.getLength(object);
|
|
|
|
}
|
2011-11-24 12:27:39 -05:00
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-24 12:27:39 -05:00
|
|
|
// Match by common words at the beginning of both files
|
|
|
|
FileName(new FileNameMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-24 12:27:39 -05:00
|
|
|
@Override
|
|
|
|
protected String getFileName(Object object) {
|
|
|
|
if (object instanceof File || object instanceof FileInfo) {
|
|
|
|
return normalizeObject(object);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-24 12:27:39 -05:00
|
|
|
return null;
|
|
|
|
}
|
2012-10-09 09:30:32 -04:00
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-10-09 09:30:32 -04:00
|
|
|
// Match by file last modified and episode release dates
|
|
|
|
TimeStamp(new TimeStampMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-10-09 09:30:32 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
// adjust differentiation accuracy to about a year
|
2013-04-02 12:41:22 -04:00
|
|
|
float f = super.getSimilarity(o1, o2);
|
2013-04-02 12:52:19 -04:00
|
|
|
return f >= 0.8 ? 1 : f >= 0 ? 0 : -1;
|
2012-10-09 09:30:32 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-10-09 09:30:32 -04:00
|
|
|
@Override
|
|
|
|
public long getTimeStamp(Object object) {
|
|
|
|
if (object instanceof Episode) {
|
2012-10-09 11:00:21 -04:00
|
|
|
try {
|
2013-07-13 06:01:33 -04:00
|
|
|
long ts = ((Episode) object).getAirdate().getTimeStamp();
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-07-09 04:08:50 -04:00
|
|
|
// big penalty for episodes not yet aired
|
|
|
|
if (ts > System.currentTimeMillis()) {
|
|
|
|
return -1;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-07-09 04:08:50 -04:00
|
|
|
return ts;
|
2012-10-09 11:00:21 -04:00
|
|
|
} catch (RuntimeException e) {
|
|
|
|
return -1; // some episodes may not have airdate defined
|
|
|
|
}
|
2012-10-09 09:30:32 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-10-09 09:30:32 -04:00
|
|
|
return super.getTimeStamp(object);
|
|
|
|
}
|
2012-10-24 11:20:47 -04:00
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-06 13:49:27 -04:00
|
|
|
SeriesRating(new SimilarityMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-06 13:49:27 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
2013-07-13 06:40:47 -04:00
|
|
|
float r1 = getRating(o1);
|
|
|
|
float r2 = getRating(o2);
|
|
|
|
return max(r1, r2) >= 0.4 ? 1 : min(r1, r2) < 0 ? -1 : 0;
|
2013-04-06 13:49:27 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-07 04:22:41 -04:00
|
|
|
private final Map<String, SeriesInfo> seriesInfoCache = new HashMap<String, SeriesInfo>();
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-06 13:49:27 -04:00
|
|
|
public float getRating(Object o) {
|
|
|
|
if (o instanceof Episode) {
|
|
|
|
try {
|
2013-04-07 04:22:41 -04:00
|
|
|
synchronized (seriesInfoCache) {
|
|
|
|
String n = ((Episode) o).getSeriesName();
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-04-07 04:22:41 -04:00
|
|
|
SeriesInfo seriesInfo = seriesInfoCache.get(n);
|
|
|
|
if (seriesInfo == null && !seriesInfoCache.containsKey(n)) {
|
2013-07-13 06:40:47 -04:00
|
|
|
try {
|
|
|
|
seriesInfo = WebServices.TheTVDB.getSeriesInfo((TheTVDBSearchResult) ((Episode) o).getSeries(), Locale.ENGLISH);
|
|
|
|
} catch (Exception e) {
|
|
|
|
seriesInfo = WebServices.TheTVDB.getSeriesInfoByLocalIndex(((Episode) o).getSeriesName(), Locale.ENGLISH);
|
|
|
|
}
|
2013-04-07 04:22:41 -04:00
|
|
|
seriesInfoCache.put(n, seriesInfo);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-07-13 06:40:47 -04:00
|
|
|
if (seriesInfo != null) {
|
|
|
|
if (seriesInfo.getRatingCount() > 0) {
|
|
|
|
float rating = max(0, seriesInfo.getRating().floatValue());
|
2013-07-23 15:06:49 -04:00
|
|
|
return seriesInfo.getRatingCount() >= 15 ? rating : 0; // PENALIZE SHOWS WITH FEW RATINGS
|
2013-07-13 06:40:47 -04:00
|
|
|
} else {
|
|
|
|
return -1; // BIG PENALTY FOR SHOWS WITH 0 RATINGS
|
|
|
|
}
|
2013-04-07 04:22:41 -04:00
|
|
|
}
|
2013-04-06 13:49:27 -04:00
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
|
|
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}),
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2013-10-29 14:34:39 -04:00
|
|
|
// Match by (region) or (year) hints
|
|
|
|
RegionHint(new SimilarityMetric() {
|
|
|
|
|
|
|
|
private Pattern hint = compile("[(](\\p{Alpha}+|\\p{Digit}+)[)]$");
|
|
|
|
|
|
|
|
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
|
|
|
|
private Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
Set<String> h1 = getHint(o1);
|
|
|
|
Set<String> h2 = getHint(o2);
|
|
|
|
|
|
|
|
return h1.isEmpty() || h2.isEmpty() ? 0 : h1.containsAll(h2) || h2.containsAll(h1) ? 1 : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Set<String> getHint(Object o) {
|
|
|
|
if (o instanceof Episode) {
|
|
|
|
Matcher m = hint.matcher(((Episode) o).getSeriesName());
|
|
|
|
if (m.find()) {
|
|
|
|
return singleton(m.group(1).trim().toLowerCase());
|
|
|
|
}
|
|
|
|
} else if (o instanceof File) {
|
|
|
|
Set<String> h = new HashSet<String>();
|
|
|
|
for (File f : listPathTail((File) o, 3, true)) {
|
|
|
|
// try to focus on series name
|
|
|
|
String n = f.getName();
|
|
|
|
String sn = seriesNameMatcher.matchByEpisodeIdentifier(n);
|
|
|
|
|
|
|
|
// tokenize
|
|
|
|
String[] tokens = punctuation.split(sn != null ? sn : n);
|
|
|
|
for (String s : tokens) {
|
|
|
|
if (s.length() > 0) {
|
|
|
|
h.add(s.trim().toLowerCase());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
return emptySet();
|
|
|
|
}
|
|
|
|
}),
|
|
|
|
|
2012-10-24 11:20:47 -04:00
|
|
|
// Match by stored MetaAttributes if possible
|
|
|
|
MetaAttributes(new CrossPropertyMetric() {
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-10-24 11:20:47 -04:00
|
|
|
@Override
|
|
|
|
protected Map<String, Object> getProperties(Object object) {
|
|
|
|
// Episode / Movie objects
|
|
|
|
if (object instanceof Episode || object instanceof Movie) {
|
|
|
|
return super.getProperties(object);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
|
|
|
// deserialize MetaAttributes if enabled and available
|
2012-10-25 12:19:53 -04:00
|
|
|
if (object instanceof File && useExtendedFileAttributes()) {
|
2012-10-24 11:20:47 -04:00
|
|
|
try {
|
2013-04-19 05:28:55 -04:00
|
|
|
return super.getProperties(new net.sourceforge.filebot.media.MetaAttributes((File) object).getObject());
|
2012-10-24 11:20:47 -04:00
|
|
|
} catch (Throwable e) {
|
|
|
|
// ignore
|
|
|
|
}
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-10-24 11:20:47 -04:00
|
|
|
// ignore everything else
|
|
|
|
return emptyMap();
|
|
|
|
};
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
});
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
// inner metric
|
|
|
|
private final SimilarityMetric metric;
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-23 05:52:46 -05:00
|
|
|
private EpisodeMetrics(SimilarityMetric metric) {
|
2009-07-26 12:54:24 -04:00
|
|
|
this.metric = metric;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
@Override
|
|
|
|
public float getSimilarity(Object o1, Object o2) {
|
|
|
|
return metric.getSimilarity(o1, o2);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-07-24 16:01:48 -04:00
|
|
|
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
|
2012-12-02 08:41:06 -05:00
|
|
|
private static final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
protected static String normalizeObject(Object object) {
|
2012-10-25 15:28:30 -04:00
|
|
|
if (object == null) {
|
|
|
|
return "";
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
String result = transformCache.get(object);
|
|
|
|
if (result != null) {
|
|
|
|
return result;
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
String name = object.toString();
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
// use name without extension
|
|
|
|
if (object instanceof File) {
|
2011-10-28 02:28:19 -04:00
|
|
|
name = getName((File) object);
|
2011-11-24 12:27:39 -05:00
|
|
|
} else if (object instanceof FileInfo) {
|
|
|
|
name = ((FileInfo) object).getName();
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-21 09:51:19 -05:00
|
|
|
// remove checksums, any [...] or (...)
|
|
|
|
name = removeEmbeddedChecksum(name);
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-12-02 08:41:06 -05:00
|
|
|
synchronized (transliterator) {
|
|
|
|
name = transliterator.transform(name);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-08 13:26:54 -05:00
|
|
|
// remove/normalize special characters
|
2012-01-01 22:34:13 -05:00
|
|
|
name = normalizePunctuation(name);
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
// normalize to lower case
|
2012-07-16 06:09:21 -04:00
|
|
|
name = name.toLowerCase();
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2012-01-03 04:23:03 -05:00
|
|
|
transformCache.put(object, name);
|
|
|
|
return name;
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-13 13:29:25 -05:00
|
|
|
public static SimilarityMetric[] defaultSequence(boolean includeFileMetrics) {
|
2011-12-25 10:47:19 -05:00
|
|
|
// 1 pass: divide by file length (only works for matching torrent entries or files)
|
|
|
|
// 2-3 pass: divide by title or season / episode numbers
|
|
|
|
// 4 pass: divide by folder / file name and show name / episode title
|
|
|
|
// 5 pass: divide by name (rounded into n levels)
|
|
|
|
// 6 pass: divide by generic numeric similarity
|
2012-10-09 09:30:32 -04:00
|
|
|
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
|
|
|
// 8 pass: resolve remaining collisions via absolute string similarity
|
2011-11-13 13:29:25 -05:00
|
|
|
if (includeFileMetrics) {
|
2013-12-14 05:49:16 -05:00
|
|
|
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, MetaAttributes, SubstringFields, new MetricCascade(NameSubstringSequence, Name), Numeric, NumericSequence, SeriesName, RegionHint, SeriesRating, TimeStamp, AbsolutePath };
|
2011-11-13 13:29:25 -05:00
|
|
|
} else {
|
2013-12-14 05:49:16 -05:00
|
|
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, MetaAttributes, SubstringFields, new MetricCascade(NameSubstringSequence, Name), Numeric, NumericSequence, SeriesName, RegionHint, SeriesRating, TimeStamp, AbsolutePath };
|
2011-11-13 13:29:25 -05:00
|
|
|
}
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2011-11-27 09:35:53 -05:00
|
|
|
public static SimilarityMetric verificationMetric() {
|
|
|
|
return new MetricCascade(FileSize, FileName, SeasonEpisode, AirDate, Title, Name);
|
|
|
|
}
|
2013-09-06 03:55:13 -04:00
|
|
|
|
2009-07-26 12:54:24 -04:00
|
|
|
}
|