1
0
mirror of https://github.com/mitb-archive/filebot synced 2025-01-11 05:48:01 -05:00

* fine-tune subtitle matching

@see https://www.filebot.net/forums/viewtopic.php?f=8&t=2869
This commit is contained in:
Reinhard Pointner 2015-07-25 22:46:28 +00:00
parent e6eef706e4
commit bc7cf8cba0
3 changed files with 66 additions and 25 deletions

View File

@ -660,7 +660,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4)); private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
private static final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove"); private static final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
protected static String normalizeObject(Object object) { public static String normalizeObject(Object object) {
if (object == null) { if (object == null) {
return ""; return "";
} }

View File

@ -1,62 +1,53 @@
package net.filebot.similarity; package net.filebot.similarity;
import static java.lang.Math.*; import static java.lang.Math.*;
import static net.filebot.similarity.CommonSequenceMatcher.*; import static net.filebot.similarity.CommonSequenceMatcher.*;
import static net.filebot.similarity.Normalization.*; import static net.filebot.similarity.Normalization.*;
import java.util.Locale; import java.util.Locale;
public class SequenceMatchSimilarity implements SimilarityMetric { public class SequenceMatchSimilarity implements SimilarityMetric {
private final CommonSequenceMatcher commonSequenceMatcher; private final CommonSequenceMatcher commonSequenceMatcher;
public SequenceMatchSimilarity() { public SequenceMatchSimilarity() {
this(10, false); this(10, false);
} }
public SequenceMatchSimilarity(int commonSequenceMaxStartIndex, boolean returnFirstMatch) { public SequenceMatchSimilarity(int commonSequenceMaxStartIndex, boolean returnFirstMatch) {
this.commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), commonSequenceMaxStartIndex, returnFirstMatch); this.commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), commonSequenceMaxStartIndex, returnFirstMatch);
} }
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
String s1 = normalize(o1); String s1 = normalize(o1);
String s2 = normalize(o2); String s2 = normalize(o2);
// match common word sequence // match common word sequence
String match = match(s1, s2); String match = match(s1, s2);
if (match == null) if (match == null || match.isEmpty())
return 0; return 0;
return similarity(match, s1, s2); return similarity(match, s1, s2);
} }
protected float similarity(String match, String s1, String s2) { protected float similarity(String match, String s1, String s2) {
return (float) match.length() / min(s1.length(), s2.length()); return (float) match.length() / min(s1.length(), s2.length());
} }
protected String normalize(Object object) { protected String normalize(Object object) {
// use string representation // use string representation
String name = object.toString(); String name = object.toString();
// normalize separators // normalize separators
name = normalizePunctuation(name); name = normalizePunctuation(name);
// normalize case and trim // normalize case and trim
return name.trim().toLowerCase(); return name.trim().toLowerCase();
} }
protected String match(String s1, String s2) { protected String match(String s1, String s2) {
return commonSequenceMatcher.matchFirstCommonSequence(s1, s2); return commonSequenceMatcher.matchFirstCommonSequence(s1, s2);
} }
} }

View File

@ -8,6 +8,7 @@ import static net.filebot.util.FileUtilities.*;
import java.io.File; import java.io.File;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.WeakHashMap; import java.util.WeakHashMap;
import java.util.logging.Level; import java.util.logging.Level;
@ -19,6 +20,7 @@ import net.filebot.media.MetaAttributes;
import net.filebot.mediainfo.MediaInfo; import net.filebot.mediainfo.MediaInfo;
import net.filebot.mediainfo.MediaInfo.StreamKind; import net.filebot.mediainfo.MediaInfo.StreamKind;
import net.filebot.similarity.CrossPropertyMetric; import net.filebot.similarity.CrossPropertyMetric;
import net.filebot.similarity.EpisodeMetrics;
import net.filebot.similarity.MetricAvg; import net.filebot.similarity.MetricAvg;
import net.filebot.similarity.MetricCascade; import net.filebot.similarity.MetricCascade;
import net.filebot.similarity.NameSimilarityMetric; import net.filebot.similarity.NameSimilarityMetric;
@ -68,6 +70,54 @@ public enum SubtitleMetrics implements SimilarityMetric {
} }
}), }),
NameSubstringSequenceExists(new SequenceMatchSimilarity() {
@Override
public float getSimilarity(Object o1, Object o2) {
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
for (String s1 : f1) {
for (String s2 : f2) {
if (super.getSimilarity(s1, s2) >= 1) {
return 1;
}
}
}
return 0;
}
protected float similarity(String match, String s1, String s2) {
return match.length() > 0 ? 1 : 0;
}
@Override
protected String normalize(Object object) {
return object.toString();
}
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
List<?> identifiers = getEffectiveIdentifiers(object);
String[] names = new String[identifiers.size()];
for (int i = 0; i < names.length; i++) {
names[i] = EpisodeMetrics.normalizeObject(identifiers.get(i));
}
return names;
}
protected List<?> getEffectiveIdentifiers(Object object) {
if (object instanceof OpenSubtitlesSubtitleDescriptor) {
return singletonList(((OpenSubtitlesSubtitleDescriptor) object).getName());
} else if (object instanceof File) {
return listPathTail((File) object, 2, true);
}
return emptyList();
}
}),
OriginalFileName(new SequenceMatchSimilarity() { OriginalFileName(new SequenceMatchSimilarity() {
@Override @Override
@ -175,11 +225,11 @@ public enum SubtitleMetrics implements SimilarityMetric {
} }
public static SimilarityMetric[] defaultSequence() { public static SimilarityMetric[] defaultSequence() {
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, OriginalFileName, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, FileName, DiskNumber, VideoProperties, new NameSimilarityMetric() }; return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, OriginalFileName, NameSubstringSequenceExists, new MetricAvg(NameSubstringSequenceExists, Name), Numeric, FileName, DiskNumber, VideoProperties, new NameSimilarityMetric() };
} }
public static SimilarityMetric verificationMetric() { public static SimilarityMetric verificationMetric() {
return new MetricCascade(AbsoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name), getMovieMatchMetric(), OriginalFileName); return new MetricCascade(AbsoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequenceExists, Name), getMovieMatchMetric(), OriginalFileName);
} }
} }