filebot/source/net/filebot/media/AutoDetection.java

523 lines
16 KiB
Java

package net.filebot.media;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static java.util.stream.Collectors.*;
import static net.filebot.Logging.*;
import static net.filebot.MediaTypes.*;
import static net.filebot.WebServices.*;
import static net.filebot.format.ExpressionFormatMethods.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.media.XattrMetaInfo.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import static net.filebot.web.EpisodeUtilities.*;
import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import net.filebot.similarity.NameSimilarityMetric;
import net.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.filebot.util.FastFile;
import net.filebot.web.Episode;
import net.filebot.web.Movie;
public class AutoDetection {
private File[] files;
private Locale locale;
public AutoDetection(Collection<File> root, boolean resolve, Locale locale) {
this.locale = locale;
// require a set of distinct files
this.files = root.stream().sorted().distinct().map(FastFile::new).toArray(File[]::new);
// resolve folders if required
if (resolve) {
this.files = resolve(stream(files), getSystemFilesFilter()).toArray(File[]::new);
}
}
protected Stream<File> resolve(Stream<File> root, FileFilter excludes) {
return root.flatMap(f -> {
if (f.isHidden() || excludes.accept(f)) {
return Stream.empty();
}
if (f.isFile()) {
return Stream.of(f);
}
if (f.isDirectory()) {
return isDiskFolder(f) ? Stream.of(f) : resolve(getChildren(f).stream(), excludes);
}
return Stream.empty();
});
}
public List<File> getFiles() {
return unmodifiableList(asList(files));
}
private static final Pattern MOVIE_FOLDER_PATTERN = compile("Movies", CASE_INSENSITIVE);
private static final Pattern SERIES_FOLDER_PATTERN = compile("TV.Shows|TV.Series|Season.\\d+", CASE_INSENSITIVE);
private static final Pattern ANIME_FOLDER_PATTERN = compile("Anime", CASE_INSENSITIVE);
private static final Pattern ABSOLUTE_EPISODE_PATTERN = compile("(?<!\\p{Alnum})E[P]?\\d{1,3}(?!\\p{Alnum})", CASE_INSENSITIVE);
private static final Pattern SERIES_EPISODE_PATTERN = compile("(?<!\\p{Alnum})(tv[sp]|Season\\D?\\d{1,2}|\\d{4}.S\\d{2})(?!\\p{Alnum})", CASE_INSENSITIVE);
private static final Pattern ANIME_EPISODE_PATTERN = compile("^\\[[^\\]]+Subs\\]", CASE_INSENSITIVE);
private static final Pattern JAPANESE_AUDIO_LANGUAGE_PATTERN = compile("jpn|Japanese", CASE_INSENSITIVE);
private static final Pattern JAPANESE_SUBTITLE_CODEC_PATTERN = compile("ASS|SSA", CASE_INSENSITIVE);
public boolean isMusic(File f) {
return AUDIO_FILES.accept(f) && !VIDEO_FILES.accept(f);
}
public boolean isMovie(File f) {
return anyMatch(f.getParentFile(), MOVIE_FOLDER_PATTERN) || MediaDetection.isMovie(f, true);
}
public boolean isEpisode(File f) {
if (anyMatch(f.getParentFile(), SERIES_FOLDER_PATTERN) || find(f.getPath(), SERIES_EPISODE_PATTERN)) {
return true;
}
if (MediaDetection.isEpisode(f.getPath(), true)) {
return true;
}
Object metaInfo = xattr.getMetaInfo(f);
if (metaInfo instanceof Episode) {
return !isInstance(AniDB, (Episode) metaInfo); // return true for known non-Anime Episode objects
}
return false;
}
public boolean isAnime(File f) {
if (!findEpisodeNumbers(f.getName(), false)) {
return false;
}
if (anyMatch(f.getParentFile(), ANIME_FOLDER_PATTERN) || find(f.getName(), ANIME_EPISODE_PATTERN) || find(f.getName(), EMBEDDED_CHECKSUM)) {
return true;
}
if (MediaCharacteristicsParser.DEFAULT.acceptVideoFile(f)) {
// check for Japanese audio or characteristic subtitles
try (MediaCharacteristics mi = MediaCharacteristicsParser.DEFAULT.open(f)) {
return mi.getDuration().toMinutes() < 60 || find(mi.getAudioLanguage(), JAPANESE_AUDIO_LANGUAGE_PATTERN) && find(mi.getSubtitleCodec(), JAPANESE_SUBTITLE_CODEC_PATTERN);
} catch (Exception e) {
debug.warning("Failed to read audio language: " + e.getMessage());
}
}
Object metaInfo = xattr.getMetaInfo(f);
return metaInfo instanceof Episode && isInstance(AniDB, (Episode) metaInfo);
}
private boolean findEpisodeNumbers(String s, boolean strict) {
List<SxE> matches = parseEpisodeNumber(s, strict);
if (matches == null || matches.isEmpty()) {
return false;
}
if (strict) {
return !matches.isEmpty();
}
return matches.stream().anyMatch(m -> m.season < 19 || m.season > 20) || findEpisodeNumbers(s, true);
}
public boolean anyMatch(File file, Pattern pattern) {
// episode characteristics override movie characteristics (e.g. episodes in ~/Movies folder which is considered a volume root)
for (File f = file; f != null && !isVolumeRoot(f); f = f.getParentFile()) {
if (pattern.matcher(f.getName()).matches()) {
return true;
}
}
return false;
}
public Map<Group, Set<File>> group() {
Map<Group, Set<File>> groups = new LinkedHashMap<Group, Set<File>>();
for (File file : files) {
try {
Group group = detectGroup(file);
groups.computeIfAbsent(group, g -> new LinkedHashSet<File>()).add(new File(file.getPath())); // use FastFile internally but do not expose to outside code that expects File objects
} catch (Exception e) {
debug.log(Level.SEVERE, e, e::toString);
}
}
return groups;
}
public Map<Group, Set<File>> groupParallel(ExecutorService threadPool) {
Map<Group, Set<File>> groups = new LinkedHashMap<Group, Set<File>>();
stream(files).collect(toMap(f -> f, f -> threadPool.submit(() -> detectGroup(f)), (a, b) -> a, LinkedHashMap::new)).forEach((file, group) -> {
try {
groups.computeIfAbsent(group.get(), k -> new LinkedHashSet<File>()).add(new File(file.getPath())); // use FastFile internally but do not expose to outside code that expects File objects
} catch (Exception e) {
debug.log(Level.SEVERE, e.getMessage(), e);
}
});
return groups;
}
private Group detectGroup(File f) throws Exception {
Group group = new Group();
if (isMusic(f))
return group.music(f);
if (isMovie(f))
return group.movie(getMovieMatches(f));
if (isEpisode(f))
return group.series(getSeriesMatches(f, false));
if (isAnime(f))
return group.anime(getSeriesMatches(f, true));
// ignore movie matches if filename looks like an episode
if (find(f.getName(), ABSOLUTE_EPISODE_PATTERN))
return group.series(getSeriesMatches(f, false));
// Movie VS Episode
List<Movie> m = getMovieMatches(f);
List<String> s = getSeriesMatches(f, false);
if (m.isEmpty() && s.isEmpty())
return group;
if (s.size() > 0 && m.isEmpty())
return group.series(s);
if (m.size() > 0 && s.isEmpty())
return group.movie(m);
return new Rules(f, s, m).apply();
}
private List<String> getSeriesMatches(File f, boolean anime) throws Exception {
List<String> names = detectSeriesNames(singleton(f), anime, locale);
if (names.isEmpty()) {
List<File> episodes = getVideoFiles(f.getParentFile());
if (episodes.size() >= 5) {
names = detectSeriesNames(episodes, anime, locale);
}
}
return names;
}
private List<Movie> getMovieMatches(File file) throws Exception {
return detectMovie(file, TheMovieDB, locale, false);
}
private List<File> getVideoFiles(File parent) {
return stream(files).filter(it -> parent.equals(it.getParentFile())).filter(VIDEO_FILES::accept).collect(toList());
}
private static final Pattern YEAR = compile("\\D(?:19|20)\\d{2}\\D");
private static final Pattern EPISODE_NUMBERS = compile("\\b\\d{1,3}\\b");
private static final Pattern DASH = compile("^.{0,3}\\s[-]\\s.+$", UNICODE_CHARACTER_CLASS);
private static final Pattern NUMBER_PAIR = compile("\\D\\d{1,2}\\D{1,3}\\d{1,2}\\D");
private static final Pattern NON_NUMBER_NAME = compile("^[\\p{L}\\p{Space}\\p{Punct}]+$", UNICODE_CHARACTER_CLASS);
private class Rules {
private final Group group;
private final File f;
private final String s;
private final Movie m;
private final String dn, fn, sn, mn, asn;
private final Pattern snm, mnm, mym;
public Rules(File file, List<String> series, List<Movie> movie) throws Exception {
group = new Group().series(series).movie(movie);
f = file;
s = series.get(0);
m = movie.get(0);
dn = normalize(getName(guessMovieFolder(f)));
fn = normalize(getName(f));
sn = normalize(s);
mn = normalize(m.getName());
snm = compile(sn, LITERAL);
mnm = compile(mn, LITERAL);
mym = compile(Integer.toString(m.getYear()), LITERAL);
asn = after(fn, snm).orElse(fn);
}
private String normalize(String self) {
return self == null ? "" : replaceSpace(normalizePunctuation(ascii(self)).toLowerCase(), " ").trim();
}
private float getSimilarity(String self, String other) {
return new NameSimilarityMetric().getSimilarity(self, other);
}
private boolean matchMovie(String name) {
return find(name, YEAR) && !matchMovieName(singleton(name), true, 0).isEmpty();
}
public Group apply() throws Exception {
List<Rule> rules = new ArrayList<Rule>(15);
rules.add(new Rule(-1, 0, this::equalsMovieName, "AutoDetection::equalsMovieName"));
rules.add(new Rule(-1, 0, this::containsMovieYear, "AutoDetection::containsMovieYear"));
rules.add(new Rule(-1, 0, this::containsMovieNameYear, "AutoDetection::containsMovieNameYear"));
rules.add(new Rule(5, -1, this::containsEpisodeNumbers, "AutoDetection::containsEpisodeNumbers"));
rules.add(new Rule(5, -1, this::commonNumberPattern, "AutoDetection::commonNumberPattern"));
rules.add(new Rule(1, -1, this::episodeWithoutNumbers, "AutoDetection::episodeWithoutNumbers"));
rules.add(new Rule(1, -1, this::episodeNumbers, "AutoDetection::episodeNumbers"));
rules.add(new Rule(-1, 1, this::hasImdbId, "AutoDetection::hasImdbId"));
rules.add(new Rule(-1, 1, this::nonNumberName, "AutoDetection::nonNumberName"));
rules.add(new Rule(-1, 5, this::exactMovieMatch, "AutoDetection::exactMovieMatch"));
rules.add(new Rule(-1, 1, this::containsMovieName, "AutoDetection::containsMovieName"));
rules.add(new Rule(-1, 1, this::similarNameYear, "AutoDetection::similarNameYear"));
rules.add(new Rule(-1, 1, this::similarNameNoNumbers, "AutoDetection::similarNameNoNumbers"));
rules.add(new Rule(-1, 1, this::aliasNameMatch, "AutoDetection::aliasNameMatch"));
int score_s = 0;
int score_m = 0;
for (Rule rule : rules) {
if (rule.test()) {
score_s += rule.s;
score_m += rule.m;
debug.finest(format("[+] %s", rule));
if (score_s >= 1 && score_m <= -1) {
debug.fine(format("[X] Rule as Series", score_s, score_m));
return group.movie(null);
}
if (score_m >= 1 && score_s <= -1) {
debug.fine(format("[X] Rule as Movie", score_s, score_m));
return group.series(null);
}
} else {
debug.finest(format("[-] %s", rule));
}
}
return group;
}
public boolean equalsMovieName() {
return mn.equals(fn);
}
public boolean containsMovieYear() {
return m.getYear() >= 1950 && listPathTail(f, 3, true).stream().anyMatch(it -> {
return after(it.getName(), mym).map(amy -> !findEpisodeNumbers(amy, false)).orElse(false);
});
}
public boolean containsMovieNameYear() {
return find(mn, snm) && Stream.of(dn, fn).anyMatch(it -> {
return after(it, YEAR).map(ay -> {
return !findEpisodeNumbers(ay, false);
}).orElse(false);
});
}
public boolean containsEpisodeNumbers() {
return findEpisodeNumbers(fn, true) || parseDate(fn) != null;
}
public boolean commonNumberPattern() {
return getChildren(f.getParentFile(), VIDEO_FILES).stream().filter(it -> {
return find(dn, snm) || find(normalize(it.getName()), snm);
}).map(it -> {
return streamMatches(it.getName(), EPISODE_NUMBERS).map(Integer::parseInt).collect(toSet());
}).filter(it -> it.size() > 0).distinct().count() >= 10;
}
public boolean episodeWithoutNumbers() {
return find(asn, DASH) && !matchMovie(fn);
}
public boolean episodeNumbers() {
String n = stripReleaseInfo(asn, false);
if (findEpisodeNumbers(n, false) || find(n, NUMBER_PAIR)) {
return Stream.of(dn, fn).anyMatch(it -> find(it, snm) && !matchMovie(it));
}
return false;
}
public boolean hasImdbId() {
return grepImdbId(fn).size() > 0;
}
public boolean nonNumberName() {
return find(getName(f), NON_NUMBER_NAME);
}
public boolean exactMovieMatch() throws Exception {
List<Movie> matches = detectMovieWithYear(f, TheMovieDB, Locale.US, true);
return matches != null && !matches.isEmpty();
}
public boolean containsMovieName() {
return fn.contains(mn) && !findEpisodeNumbers(after(fn, mnm).orElse(fn), false);
}
public boolean similarNameYear() {
return getSimilarity(mn, fn) >= 0.8f || Stream.of(dn, fn).anyMatch(it -> {
return matchIntegers(it).stream().filter(y -> m.getYear() - 1 <= y && y <= m.getYear() + 1).count() > 0;
});
}
public boolean similarNameNoNumbers() {
return Stream.of(dn, fn).anyMatch(it -> {
return find(it, mnm) && !find(after(it, mnm).orElse(it), EPISODE_NUMBERS) && getSimilarity(it, mn) >= 0.2f + getSimilarity(it, sn);
});
}
public boolean aliasNameMatch() {
return m.getEffectiveNamesWithoutYear().stream().map(this::normalize).filter(s -> s.length() >= 5).anyMatch(fn::contains);
}
}
@FunctionalInterface
private interface Test {
boolean test() throws Exception;
}
private static class Rule implements Test {
public final int s;
public final int m;
private final Test t;
private final String name;
public Rule(int s, int m, Test t, String name) {
this.s = s;
this.m = m;
this.t = t;
this.name = name;
}
@Override
public boolean test() throws Exception {
return t.test();
}
@Override
public String toString() {
return name;
}
}
public enum Type {
Movie, Series, Anime, Music;
}
public static class Group extends EnumMap<Type, Object> {
public Group() {
super(Type.class);
}
public Group movie(List<Movie> movies) {
put(Type.Movie, movies == null || movies.isEmpty() ? null : new Movie(movies.get(0))); // group by Movie (rather than MoviePart)
return this;
}
public Group series(List<String> names) {
put(Type.Series, names == null || names.isEmpty() ? null : replaceSpace(normalizePunctuation(names.get(0)).toLowerCase(), " ").trim());
return this;
}
public Group anime(List<String> names) {
put(Type.Anime, names == null || names.isEmpty() ? null : replaceSpace(normalizePunctuation(names.get(0)).toLowerCase(), " ").trim());
return this;
}
public Group music(File f) {
put(Type.Music, f == null ? null : f.getParent());
return this;
}
public Object getMovie() {
return get(Type.Movie);
}
public Object getSeries() {
return get(Type.Series);
}
public Object getAnime() {
return get(Type.Anime);
}
public Object getMusic() {
return get(Type.Music);
}
public Group setMovie() {
put(Type.Movie, Boolean.TRUE);
return this;
}
public Group setSeries() {
put(Type.Series, Boolean.TRUE);
return this;
}
public Group setAnime() {
put(Type.Anime, Boolean.TRUE);
return this;
}
public Group setMusic() {
put(Type.Music, Boolean.TRUE);
return this;
}
public boolean isMovie() {
return get(Type.Movie) != null;
}
public boolean isSeries() {
return get(Type.Series) != null;
}
public boolean isAnime() {
return get(Type.Anime) != null;
}
public boolean isMusic() {
return get(Type.Music) != null;
}
public Type[] types() {
return entrySet().stream().filter(it -> it.getValue() != null).map(it -> it.getKey()).toArray(Type[]::new);
}
}
}