mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-23 08:18:52 -05:00
Experiment with SAMI subtitles
This commit is contained in:
parent
6961b25ad3
commit
3ac78751b6
97
source/net/filebot/subtitle/SamiReader.java
Normal file
97
source/net/filebot/subtitle/SamiReader.java
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
package net.filebot.subtitle;
|
||||||
|
|
||||||
|
import static java.util.stream.Collectors.*;
|
||||||
|
import static net.filebot.Logging.*;
|
||||||
|
import static net.filebot.similarity.Normalization.*;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
public class SamiReader {
|
||||||
|
|
||||||
|
public List<SubtitleElement> decode(CharSequence file) {
|
||||||
|
List<SubtitleElement> subtitles = new ArrayList<SubtitleElement>();
|
||||||
|
|
||||||
|
Matcher matcher = Pattern.compile("<SYNC(.*?)>", Pattern.CASE_INSENSITIVE).matcher(file);
|
||||||
|
|
||||||
|
long previousSyncStart = -1;
|
||||||
|
long previousSyncEnd = -1;
|
||||||
|
int previousSequenceEnd = -1;
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
Element sync = Jsoup.parseBodyFragment(matcher.group()).select("sync").first();
|
||||||
|
|
||||||
|
long nextSyncStart = getLongAttribute(sync, "start");
|
||||||
|
long nextSyncEnd = getLongAttribute(sync, "end");
|
||||||
|
|
||||||
|
if (previousSequenceEnd > 0) {
|
||||||
|
// use Start time of the next subtitle element as End time of the previous one by default
|
||||||
|
if (previousSyncEnd < 0) {
|
||||||
|
previousSyncEnd = nextSyncStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
SubtitleElement subtitle = getSubtitle(previousSyncStart, previousSyncEnd, file.subSequence(previousSequenceEnd, matcher.start()));
|
||||||
|
if (subtitle != null) {
|
||||||
|
subtitles.add(subtitle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextSyncStart >= 0) {
|
||||||
|
previousSyncStart = nextSyncStart;
|
||||||
|
previousSyncEnd = nextSyncEnd;
|
||||||
|
previousSequenceEnd = matcher.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// last element if any
|
||||||
|
if (previousSequenceEnd > 0) {
|
||||||
|
// if end time is not known, then just set subtitle duration to 2 seconds
|
||||||
|
if (previousSyncEnd < 0) {
|
||||||
|
previousSyncEnd = previousSyncStart + 2000;
|
||||||
|
}
|
||||||
|
|
||||||
|
SubtitleElement subtitle = getSubtitle(previousSyncStart, previousSyncEnd, file.subSequence(previousSequenceEnd, file.length()));
|
||||||
|
if (subtitle != null) {
|
||||||
|
subtitles.add(subtitle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return subtitles;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SubtitleElement getSubtitle(long start, long end, CharSequence fragment) {
|
||||||
|
if (start >= 0 && end >= 0) {
|
||||||
|
Document document = Jsoup.parseBodyFragment(fragment.toString());
|
||||||
|
String text = document.select("p").stream().map(p -> p.text()).map(s -> replaceSpace(s, " ")).filter(s -> s.length() > 0).collect(joining("\n")).trim();
|
||||||
|
|
||||||
|
if (text.length() > 0) {
|
||||||
|
return new SubtitleElement(start, end, text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getLongAttribute(Element node, String key) {
|
||||||
|
if (node != null) {
|
||||||
|
String value = node.attr(key);
|
||||||
|
|
||||||
|
if (value.length() > 0) {
|
||||||
|
try {
|
||||||
|
return Long.parseLong(value);
|
||||||
|
} catch (Exception e) {
|
||||||
|
debug.warning(cause(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user