mirror of
https://github.com/mitb-archive/filebot
synced 2024-08-13 17:03:45 -04:00
* fine-tune subtitle auto-selection
This commit is contained in:
parent
3e4da0f254
commit
1c928e5592
156
source/net/filebot/subtitle/SubtitleMetrics.java
Normal file
156
source/net/filebot/subtitle/SubtitleMetrics.java
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
package net.filebot.subtitle;
|
||||||
|
|
||||||
|
import static java.util.Collections.*;
|
||||||
|
import static net.filebot.media.MediaDetection.*;
|
||||||
|
import static net.filebot.similarity.EpisodeMetrics.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.WeakHashMap;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import net.filebot.mediainfo.MediaInfo;
|
||||||
|
import net.filebot.mediainfo.MediaInfo.StreamKind;
|
||||||
|
import net.filebot.similarity.CrossPropertyMetric;
|
||||||
|
import net.filebot.similarity.EpisodeMetrics;
|
||||||
|
import net.filebot.similarity.MetricAvg;
|
||||||
|
import net.filebot.similarity.MetricCascade;
|
||||||
|
import net.filebot.similarity.NameSimilarityMetric;
|
||||||
|
import net.filebot.similarity.NumericSimilarityMetric;
|
||||||
|
import net.filebot.similarity.SimilarityMetric;
|
||||||
|
import net.filebot.web.OpenSubtitlesSubtitleDescriptor;
|
||||||
|
import net.filebot.web.SubtitleDescriptor;
|
||||||
|
|
||||||
|
public enum SubtitleMetrics implements SimilarityMetric {
|
||||||
|
|
||||||
|
// subtitle verification metric specifically excluding SxE mismatches
|
||||||
|
AbsoluteSeasonEpisode(new SimilarityMetric() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
float f = SeasonEpisode.getSimilarity(o1, o2);
|
||||||
|
if (f == 0 && (getEpisodeIdentifier(o1.toString(), true) == null) == (getEpisodeIdentifier(o2.toString(), true) == null)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return f < 1 ? -1 : 1;
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
|
DiskNumber(new NumericSimilarityMetric() {
|
||||||
|
|
||||||
|
private final Pattern CDNO = Pattern.compile("(?:CD|DISK)(\\d+)", Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
int c1 = getDiskNumber(o1);
|
||||||
|
int c2 = getDiskNumber(o2);
|
||||||
|
|
||||||
|
if (c1 == 0 && c2 == 0) // undefined
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return c1 == c2 ? 1 : -1; // positive or negative match
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getDiskNumber(Object o) {
|
||||||
|
int cd = 0;
|
||||||
|
Matcher matcher = CDNO.matcher(o.toString());
|
||||||
|
while (matcher.find()) {
|
||||||
|
cd = Integer.parseInt(matcher.group(1));
|
||||||
|
}
|
||||||
|
return cd;
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
|
VideoProperties(new CrossPropertyMetric() {
|
||||||
|
|
||||||
|
private final String FPS = "FPS";
|
||||||
|
private final String SECONDS = "SECS";
|
||||||
|
|
||||||
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
return o1 instanceof SubtitleDescriptor ? super.getSimilarity(o1, o2) : super.getSimilarity(o2, o1); // make sure that SubtitleDescriptor is o1
|
||||||
|
};
|
||||||
|
|
||||||
|
protected Map<String, Object> getProperties(Object object) {
|
||||||
|
if (object instanceof OpenSubtitlesSubtitleDescriptor) {
|
||||||
|
return getSubtitleProperties((OpenSubtitlesSubtitleDescriptor) object);
|
||||||
|
} else if (object instanceof File) {
|
||||||
|
return getVideoProperties((File) object);
|
||||||
|
}
|
||||||
|
return emptyMap();
|
||||||
|
};
|
||||||
|
|
||||||
|
private Map<String, Object> getSubtitleProperties(OpenSubtitlesSubtitleDescriptor subtitle) {
|
||||||
|
Map<String, Object> props = new HashMap<String, Object>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
float fps = Math.round(subtitle.getMovieFPS()); // round because most FPS values in the database are bad anyway
|
||||||
|
if (fps > 0) {
|
||||||
|
props.put(FPS, fps);
|
||||||
|
}
|
||||||
|
long seconds = (long) Math.floor(subtitle.getMovieTimeMS() / (double) 1000);
|
||||||
|
if (seconds > 0) {
|
||||||
|
props.put(SECONDS, seconds);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
return props;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final Map<File, Map<String, Object>> mediaInfoCache = new WeakHashMap<File, Map<String, Object>>(64);
|
||||||
|
|
||||||
|
private Map<String, Object> getVideoProperties(File file) {
|
||||||
|
synchronized (mediaInfoCache) {
|
||||||
|
return mediaInfoCache.computeIfAbsent(file, (f) -> {
|
||||||
|
try {
|
||||||
|
Map<String, Object> props = new HashMap<String, Object>();
|
||||||
|
|
||||||
|
MediaInfo mediaInfo = new MediaInfo();
|
||||||
|
if (mediaInfo.open(file)) {
|
||||||
|
float fps = Math.round(Float.parseFloat(mediaInfo.get(StreamKind.Video, 0, "FrameRate")));
|
||||||
|
if (fps > 0) {
|
||||||
|
props.put(FPS, fps);
|
||||||
|
}
|
||||||
|
long seconds = (long) Math.floor(Long.parseLong(mediaInfo.get(StreamKind.Video, 0, "Duration")) / (double) 1000);
|
||||||
|
if (seconds > 0) {
|
||||||
|
props.put(SECONDS, seconds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return props;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return emptyMap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// inner metric
|
||||||
|
private final SimilarityMetric metric;
|
||||||
|
|
||||||
|
private SubtitleMetrics(SimilarityMetric metric) {
|
||||||
|
this.metric = metric;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
return metric.getSimilarity(o1, o2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SimilarityMetric[] defaultSequence() {
|
||||||
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, FileName, DiskNumber, VideoProperties, new NameSimilarityMetric() };
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SimilarityMetric verificationMetric() {
|
||||||
|
return EpisodeMetrics.verificationMetric();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SimilarityMetric sanityMetric() {
|
||||||
|
return new MetricCascade(AbsoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name), getMovieMatchMetric());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -4,7 +4,6 @@ import static java.lang.Math.*;
|
|||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static net.filebot.MediaTypes.*;
|
import static net.filebot.MediaTypes.*;
|
||||||
import static net.filebot.media.MediaDetection.*;
|
import static net.filebot.media.MediaDetection.*;
|
||||||
import static net.filebot.similarity.EpisodeMetrics.*;
|
|
||||||
import static net.filebot.similarity.Normalization.*;
|
import static net.filebot.similarity.Normalization.*;
|
||||||
import static net.filebot.util.FileUtilities.*;
|
import static net.filebot.util.FileUtilities.*;
|
||||||
|
|
||||||
@ -35,7 +34,6 @@ import net.filebot.similarity.EpisodeMetrics;
|
|||||||
import net.filebot.similarity.Match;
|
import net.filebot.similarity.Match;
|
||||||
import net.filebot.similarity.Matcher;
|
import net.filebot.similarity.Matcher;
|
||||||
import net.filebot.similarity.MetricAvg;
|
import net.filebot.similarity.MetricAvg;
|
||||||
import net.filebot.similarity.MetricCascade;
|
|
||||||
import net.filebot.similarity.NameSimilarityMetric;
|
import net.filebot.similarity.NameSimilarityMetric;
|
||||||
import net.filebot.similarity.SequenceMatchSimilarity;
|
import net.filebot.similarity.SequenceMatchSimilarity;
|
||||||
import net.filebot.similarity.SimilarityMetric;
|
import net.filebot.similarity.SimilarityMetric;
|
||||||
@ -139,11 +137,11 @@ public final class SubtitleUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// add other possible matches to the options
|
// add other possible matches to the options
|
||||||
SimilarityMetric sanity = EpisodeMetrics.verificationMetric();
|
SimilarityMetric sanity = SubtitleMetrics.verificationMetric();
|
||||||
float minMatchSimilarity = strict ? 0.9f : 0.6f;
|
float minMatchSimilarity = strict ? 0.9f : 0.6f;
|
||||||
|
|
||||||
// first match everything as best as possible, then filter possibly bad matches
|
// first match everything as best as possible, then filter possibly bad matches
|
||||||
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles, false).entrySet()) {
|
for (Entry<File, SubtitleDescriptor> it : matchSubtitles(files, subtitles).entrySet()) {
|
||||||
if (sanity.getSimilarity(it.getKey(), it.getValue()) >= minMatchSimilarity) {
|
if (sanity.getSimilarity(it.getKey(), it.getValue()) >= minMatchSimilarity) {
|
||||||
subtitlesByFile.get(it.getKey()).add(it.getValue());
|
subtitlesByFile.get(it.getKey()).add(it.getValue());
|
||||||
}
|
}
|
||||||
@ -178,31 +176,20 @@ public final class SubtitleUtilities {
|
|||||||
return subtitlesByFile;
|
return subtitlesByFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles, boolean strict) throws InterruptedException {
|
public static Map<File, SubtitleDescriptor> matchSubtitles(Collection<File> files, Collection<SubtitleDescriptor> subtitles) throws InterruptedException {
|
||||||
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
||||||
|
|
||||||
// optimize for generic media <-> subtitle matching
|
// optimize for generic media <-> subtitle matching
|
||||||
SimilarityMetric[] metrics = new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, new NameSimilarityMetric() };
|
SimilarityMetric[] metrics = SubtitleMetrics.defaultSequence();
|
||||||
|
|
||||||
// subtitle verification metric specifically excluding SxE mismatches
|
|
||||||
SimilarityMetric absoluteSeasonEpisode = new SimilarityMetric() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
|
||||||
float f = SeasonEpisode.getSimilarity(o1, o2);
|
|
||||||
if (f == 0 && (getEpisodeIdentifier(o1.toString(), true) == null) == (getEpisodeIdentifier(o2.toString(), true) == null)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return f < 1 ? -1 : 1;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
SimilarityMetric sanity = new MetricCascade(absoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name), getMovieMatchMetric());
|
|
||||||
|
|
||||||
// first match everything as best as possible, then filter possibly bad matches
|
// first match everything as best as possible, then filter possibly bad matches
|
||||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
||||||
|
|
||||||
|
SimilarityMetric sanity = SubtitleMetrics.sanityMetric();
|
||||||
|
float minSanitySimilarity = 0.1f;
|
||||||
|
|
||||||
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
for (Match<File, SubtitleDescriptor> it : matcher.match()) {
|
||||||
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= (strict ? 0.9f : 0.6f)) {
|
if (sanity.getSimilarity(it.getValue(), it.getCandidate()) >= minSanitySimilarity) {
|
||||||
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
subtitleByVideo.put(it.getValue(), it.getCandidate());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,7 +237,7 @@ public final class SubtitleUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return matchSubtitles(singleton(file), subtitles, strict).entrySet().iterator().next().getValue();
|
return matchSubtitles(singleton(file), subtitles).entrySet().iterator().next().getValue();
|
||||||
} catch (NoSuchElementException e) {
|
} catch (NoSuchElementException e) {
|
||||||
return null;
|
return null;
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
@ -96,6 +96,22 @@ public class OpenSubtitlesSubtitleDescriptor implements SubtitleDescriptor, Seri
|
|||||||
return Integer.parseInt(getProperty(Property.QueryNumber));
|
return Integer.parseInt(getProperty(Property.QueryNumber));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float getMovieFPS() {
|
||||||
|
return Float.parseFloat(getProperty(Property.MovieFPS));
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMovieTimeMS() {
|
||||||
|
return Long.parseLong(getProperty(Property.MovieTimeMS));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getSubActualCD() {
|
||||||
|
return Integer.parseInt(getProperty(Property.SubActualCD));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getSubSumCD() {
|
||||||
|
return Integer.parseInt(getProperty(Property.SubSumCD));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ByteBuffer fetch() throws Exception {
|
public ByteBuffer fetch() throws Exception {
|
||||||
URL resource = new URL(getProperty(Property.SubDownloadLink));
|
URL resource = new URL(getProperty(Property.SubDownloadLink));
|
||||||
|
Loading…
Reference in New Issue
Block a user