mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-11 05:48:01 -05:00
* fine-tune subtitle matching
@see https://www.filebot.net/forums/viewtopic.php?f=8&t=2869
This commit is contained in:
parent
e6eef706e4
commit
bc7cf8cba0
@ -660,7 +660,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||||||
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
|
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
|
||||||
private static final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
|
private static final Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove");
|
||||||
|
|
||||||
protected static String normalizeObject(Object object) {
|
public static String normalizeObject(Object object) {
|
||||||
if (object == null) {
|
if (object == null) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
@ -1,62 +1,53 @@
|
|||||||
|
|
||||||
package net.filebot.similarity;
|
package net.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
import static java.lang.Math.*;
|
import static java.lang.Math.*;
|
||||||
import static net.filebot.similarity.CommonSequenceMatcher.*;
|
import static net.filebot.similarity.CommonSequenceMatcher.*;
|
||||||
import static net.filebot.similarity.Normalization.*;
|
import static net.filebot.similarity.Normalization.*;
|
||||||
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
|
|
||||||
public class SequenceMatchSimilarity implements SimilarityMetric {
|
public class SequenceMatchSimilarity implements SimilarityMetric {
|
||||||
|
|
||||||
private final CommonSequenceMatcher commonSequenceMatcher;
|
private final CommonSequenceMatcher commonSequenceMatcher;
|
||||||
|
|
||||||
|
|
||||||
public SequenceMatchSimilarity() {
|
public SequenceMatchSimilarity() {
|
||||||
this(10, false);
|
this(10, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public SequenceMatchSimilarity(int commonSequenceMaxStartIndex, boolean returnFirstMatch) {
|
public SequenceMatchSimilarity(int commonSequenceMaxStartIndex, boolean returnFirstMatch) {
|
||||||
this.commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), commonSequenceMaxStartIndex, returnFirstMatch);
|
this.commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(Locale.ROOT), commonSequenceMaxStartIndex, returnFirstMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
String s1 = normalize(o1);
|
String s1 = normalize(o1);
|
||||||
String s2 = normalize(o2);
|
String s2 = normalize(o2);
|
||||||
|
|
||||||
// match common word sequence
|
// match common word sequence
|
||||||
String match = match(s1, s2);
|
String match = match(s1, s2);
|
||||||
if (match == null)
|
if (match == null || match.isEmpty())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return similarity(match, s1, s2);
|
return similarity(match, s1, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected float similarity(String match, String s1, String s2) {
|
protected float similarity(String match, String s1, String s2) {
|
||||||
return (float) match.length() / min(s1.length(), s2.length());
|
return (float) match.length() / min(s1.length(), s2.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String normalize(Object object) {
|
protected String normalize(Object object) {
|
||||||
// use string representation
|
// use string representation
|
||||||
String name = object.toString();
|
String name = object.toString();
|
||||||
|
|
||||||
// normalize separators
|
// normalize separators
|
||||||
name = normalizePunctuation(name);
|
name = normalizePunctuation(name);
|
||||||
|
|
||||||
// normalize case and trim
|
// normalize case and trim
|
||||||
return name.trim().toLowerCase();
|
return name.trim().toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String match(String s1, String s2) {
|
protected String match(String s1, String s2) {
|
||||||
return commonSequenceMatcher.matchFirstCommonSequence(s1, s2);
|
return commonSequenceMatcher.matchFirstCommonSequence(s1, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import static net.filebot.util.FileUtilities.*;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.WeakHashMap;
|
import java.util.WeakHashMap;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
@ -19,6 +20,7 @@ import net.filebot.media.MetaAttributes;
|
|||||||
import net.filebot.mediainfo.MediaInfo;
|
import net.filebot.mediainfo.MediaInfo;
|
||||||
import net.filebot.mediainfo.MediaInfo.StreamKind;
|
import net.filebot.mediainfo.MediaInfo.StreamKind;
|
||||||
import net.filebot.similarity.CrossPropertyMetric;
|
import net.filebot.similarity.CrossPropertyMetric;
|
||||||
|
import net.filebot.similarity.EpisodeMetrics;
|
||||||
import net.filebot.similarity.MetricAvg;
|
import net.filebot.similarity.MetricAvg;
|
||||||
import net.filebot.similarity.MetricCascade;
|
import net.filebot.similarity.MetricCascade;
|
||||||
import net.filebot.similarity.NameSimilarityMetric;
|
import net.filebot.similarity.NameSimilarityMetric;
|
||||||
@ -68,6 +70,54 @@ public enum SubtitleMetrics implements SimilarityMetric {
|
|||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
NameSubstringSequenceExists(new SequenceMatchSimilarity() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
|
||||||
|
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
|
||||||
|
|
||||||
|
for (String s1 : f1) {
|
||||||
|
for (String s2 : f2) {
|
||||||
|
if (super.getSimilarity(s1, s2) >= 1) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected float similarity(String match, String s1, String s2) {
|
||||||
|
return match.length() > 0 ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String normalize(Object object) {
|
||||||
|
return object.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
|
||||||
|
List<?> identifiers = getEffectiveIdentifiers(object);
|
||||||
|
String[] names = new String[identifiers.size()];
|
||||||
|
|
||||||
|
for (int i = 0; i < names.length; i++) {
|
||||||
|
names[i] = EpisodeMetrics.normalizeObject(identifiers.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
return names;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<?> getEffectiveIdentifiers(Object object) {
|
||||||
|
if (object instanceof OpenSubtitlesSubtitleDescriptor) {
|
||||||
|
return singletonList(((OpenSubtitlesSubtitleDescriptor) object).getName());
|
||||||
|
} else if (object instanceof File) {
|
||||||
|
return listPathTail((File) object, 2, true);
|
||||||
|
}
|
||||||
|
return emptyList();
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
OriginalFileName(new SequenceMatchSimilarity() {
|
OriginalFileName(new SequenceMatchSimilarity() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -175,11 +225,11 @@ public enum SubtitleMetrics implements SimilarityMetric {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static SimilarityMetric[] defaultSequence() {
|
public static SimilarityMetric[] defaultSequence() {
|
||||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, OriginalFileName, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, FileName, DiskNumber, VideoProperties, new NameSimilarityMetric() };
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, OriginalFileName, NameSubstringSequenceExists, new MetricAvg(NameSubstringSequenceExists, Name), Numeric, FileName, DiskNumber, VideoProperties, new NameSimilarityMetric() };
|
||||||
}
|
}
|
||||||
|
|
||||||
public static SimilarityMetric verificationMetric() {
|
public static SimilarityMetric verificationMetric() {
|
||||||
return new MetricCascade(AbsoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name), getMovieMatchMetric(), OriginalFileName);
|
return new MetricCascade(AbsoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequenceExists, Name), getMovieMatchMetric(), OriginalFileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user