mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-23 08:18:52 -05:00
* fine-tune generic numberic sequence matching (e.g. Bones Staffel 1 Folge 5)
This commit is contained in:
parent
1e06994a59
commit
24f9b8d92a
@ -8,11 +8,15 @@ import static java.util.Collections.*;
|
||||
import static net.sourceforge.filebot.Settings.*;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
import static net.sourceforge.tuned.StringUtilities.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
|
||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||
import net.sourceforge.filebot.vfs.FileInfo;
|
||||
@ -187,7 +191,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
}),
|
||||
|
||||
// Match via common word sequence in episode name and file name
|
||||
SubstringSequence(new SequenceMatchSimilarity() {
|
||||
NameSubstringSequence(new SequenceMatchSimilarity() {
|
||||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
@ -199,12 +203,19 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
|
||||
@Override
|
||||
protected String normalize(Object object) {
|
||||
if (object instanceof Episode) {
|
||||
object = removeTrailingBrackets(((Episode) object).getSeriesName());
|
||||
} else if (object instanceof Movie) {
|
||||
object = ((Movie) object).getName();
|
||||
} else if (object instanceof File) {
|
||||
object = getNameWithoutExtension(getRelativePathTail((File) object, 3).getPath());
|
||||
}
|
||||
// simplify file name, if possible
|
||||
return normalizeObject(object);
|
||||
}
|
||||
}),
|
||||
|
||||
// Match by generic name similarity
|
||||
// Match by generic name similarity (round rank)
|
||||
Name(new NameSimilarityMetric() {
|
||||
|
||||
@Override
|
||||
@ -222,12 +233,33 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
}
|
||||
}),
|
||||
|
||||
NumericSequence(new SequenceMatchSimilarity() {
|
||||
// Match by generic name similarity (absolute)
|
||||
AbsoluteName(new NameSimilarityMetric() {
|
||||
|
||||
@Override
|
||||
protected String normalize(Object object) {
|
||||
// simplify file name, if possible
|
||||
return normalizeObject(object).replaceAll("\\D+", " ").trim();
|
||||
return normalizeObject(object);
|
||||
}
|
||||
}),
|
||||
|
||||
NumericSequence(new SequenceMatchSimilarity() {
|
||||
|
||||
@Override
|
||||
protected String normalize(Object object) {
|
||||
if (object instanceof Episode) {
|
||||
object = EpisodeFormat.SeasonEpisode.formatSxE((Episode) object);
|
||||
} else if (object instanceof Movie) {
|
||||
object = ((Movie) object).getYear();
|
||||
}
|
||||
|
||||
// simplify file name if possible and extract numbers
|
||||
List<Integer> numbers = new ArrayList<Integer>(4);
|
||||
Scanner scanner = new Scanner(normalizeObject(object)).useDelimiter("\\D+");
|
||||
while (scanner.hasNextInt()) {
|
||||
numbers.add(scanner.nextInt());
|
||||
}
|
||||
return join(numbers, " ");
|
||||
}
|
||||
}),
|
||||
|
||||
@ -411,9 +443,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
||||
// 8 pass: resolve remaining collisions via absolute string similarity
|
||||
if (includeFileMetrics) {
|
||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(NameSubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, AbsoluteName };
|
||||
} else {
|
||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(NameSubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, AbsoluteName };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,13 +148,14 @@ public class Matcher<V, C> {
|
||||
for (Match<V, C> possibleMatch : possibleMatches) {
|
||||
float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate());
|
||||
|
||||
Set<Match<V, C>> matchSet = similarityMap.get(similarity);
|
||||
// DEBUG
|
||||
// System.out.format("%s: %.04f: %s%n", metric, similarity, possibleMatch);
|
||||
|
||||
Set<Match<V, C>> matchSet = similarityMap.get(similarity);
|
||||
if (matchSet == null) {
|
||||
matchSet = new LinkedHashSet<Match<V, C>>();
|
||||
similarityMap.put(similarity, matchSet);
|
||||
}
|
||||
|
||||
matchSet.add(possibleMatch);
|
||||
|
||||
// unwind this thread if we have been interrupted
|
||||
|
@ -24,6 +24,11 @@ public class SequenceMatchSimilarity implements SimilarityMetric {
|
||||
if (match == null)
|
||||
return 0;
|
||||
|
||||
return similarity(match, s1, s2);
|
||||
}
|
||||
|
||||
|
||||
protected float similarity(String match, String s1, String s2) {
|
||||
return (float) match.length() / min(s1.length(), s2.length());
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ public final class SubtitleUtilities {
|
||||
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
||||
|
||||
// optimize for generic media <-> subtitle matching
|
||||
SimilarityMetric[] metrics = new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringSequence, new MetricCascade(SubstringSequence, Name), Numeric, new NameSimilarityMetric() };
|
||||
SimilarityMetric[] metrics = new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, new NameSimilarityMetric() };
|
||||
|
||||
// subtitle verification metric specifically excluding SxE mismatches
|
||||
SimilarityMetric absoluteSeasonEpisode = new SimilarityMetric() {
|
||||
@ -61,7 +61,7 @@ public final class SubtitleUtilities {
|
||||
return f < 1 ? -1 : 1;
|
||||
}
|
||||
};
|
||||
SimilarityMetric sanity = new MetricCascade(absoluteSeasonEpisode, AirDate, new MetricAvg(SubstringSequence, Name));
|
||||
SimilarityMetric sanity = new MetricCascade(absoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name));
|
||||
|
||||
// first match everything as best as possible, then filter possibly bad matches
|
||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
||||
|
@ -352,9 +352,15 @@ public final class FileUtilities {
|
||||
|
||||
|
||||
public static List<File> listPath(File file) {
|
||||
return listPathTail(file, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
|
||||
public static List<File> listPathTail(File file, int tailSize) {
|
||||
LinkedList<File> nodes = new LinkedList<File>();
|
||||
|
||||
for (File node = file; node != null && !UNC_PREFIX.equals(node.toString()); node = node.getParentFile()) {
|
||||
File node = file;
|
||||
for (int i = 0; node != null && i < tailSize && !UNC_PREFIX.equals(node.toString()); i++, node = node.getParentFile()) {
|
||||
nodes.addFirst(node);
|
||||
}
|
||||
|
||||
@ -362,6 +368,17 @@ public final class FileUtilities {
|
||||
}
|
||||
|
||||
|
||||
public static File getRelativePathTail(File file, int tailSize) {
|
||||
File f = null;
|
||||
for (File it : listPathTail(file, tailSize)) {
|
||||
if (it.getParentFile() != null) {
|
||||
f = new File(f, it.getName());
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
public static List<File> listFiles(Iterable<File> folders, int maxDepth, boolean listHiddenFiles) {
|
||||
List<File> files = new ArrayList<File>();
|
||||
|
||||
|
@ -123,6 +123,7 @@ Hard.Subbed
|
||||
HBO
|
||||
hd
|
||||
HDRip
|
||||
Hi10P
|
||||
Hindi
|
||||
History.Channel
|
||||
HQ
|
||||
|
@ -1,2 +1,3 @@
|
||||
HIMYM How I Met your Mother
|
||||
Hml8p Homeland
|
||||
Hml8p Homeland
|
||||
NCIS.LA NCIS: Los Angeles
|
@ -3,8 +3,8 @@ def input = []
|
||||
def failOnError = _args.conflict == 'fail'
|
||||
|
||||
// print input parameters
|
||||
_args.bindings?.each{ _log.finest("Parameter: $it.key = $it.value") }
|
||||
args.each{ _log.finest("Argument: $it") }
|
||||
_args.bindings?.each{ _log.fine("Parameter: $it.key = $it.value") }
|
||||
args.each{ _log.fine("Argument: $it") }
|
||||
args.findAll{ !it.exists() }.each{ throw new Exception("File not found: $it") }
|
||||
|
||||
// check user-defined pre-condition
|
||||
@ -34,7 +34,7 @@ def format = [
|
||||
tvs: tryQuietly{ seriesFormat } ?: '''TV Shows/{n}/{episode.special ? "Special" : "Season "+s}/{n} - {episode.special ? "S00E"+special.pad(2) : s00e00} - {t}{".$lang"}''',
|
||||
anime: tryQuietly{ animeFormat } ?: '''Anime/{n}/{n} - {sxe} - {t}''',
|
||||
mov: tryQuietly{ movieFormat } ?: '''Movies/{n} ({y})/{n} ({y}){" CD$pi"}{".$lang"}''',
|
||||
music: tryQuietly{ musicFormat } ?: '''Music/{n}/{album}/{n} - {t}'''
|
||||
music: tryQuietly{ musicFormat } ?: '''Music/{n}/{album+'/'}{pi.pad(2)+'. '}{artist} - {t}'''
|
||||
]
|
||||
|
||||
|
||||
@ -117,7 +117,7 @@ def groups = input.groupBy{ f ->
|
||||
|
||||
def tvs = detectSeriesName(f)
|
||||
def mov = detectMovie(f, false)
|
||||
println "$f.name [series: $tvs, movie: $mov]"
|
||||
_log.fine("$f.name [series: $tvs, movie: $mov]")
|
||||
|
||||
// DECIDE EPISODE VS MOVIE (IF NOT CLEAR)
|
||||
if (tvs && mov) {
|
||||
@ -129,10 +129,10 @@ def groups = input.groupBy{ f ->
|
||||
|
||||
// S00E00 | 2012.07.21 | One Piece 217 | Firefly - Serenity | [Taken 1, Taken 2, Taken 3, Taken 4, ..., Taken 10]
|
||||
if (parseEpisodeNumber(fn, true) || parseDate(fn) || (fn =~ sn && parseEpisodeNumber(fn.after(sn), false)) || fn.after(sn) =~ / - .+/ || f.dir.listFiles{ it.isVideo() && norm(it.name) =~ sn && it.name =~ /\b\d{1,3}\b/}.size() >= 10) {
|
||||
println "Exclude Movie: $mov"
|
||||
_log.fine("Exclude Movie: $mov")
|
||||
mov = null
|
||||
} else if ((detectMovie(f, true) && [dn, fn].find{ it =~ /(19|20)\d{2}/ }) || [dn, fn].find{ it =~ mn && !(it.after(mn) =~ /\b\d{1,3}\b/) }) {
|
||||
println "Exclude Series: $tvs"
|
||||
_log.fine("Exclude Series: $tvs")
|
||||
tvs = null
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user