filebot/source/net/filebot/media/AutoDetection.java

446 lines
14 KiB
Java

package net.filebot.media;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static java.util.stream.Collectors.*;
import static net.filebot.Logging.*;
import static net.filebot.MediaTypes.*;
import static net.filebot.Settings.*;
import static net.filebot.WebServices.*;
import static net.filebot.format.ExpressionFormatMethods.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.media.XattrMetaInfo.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import java.io.File;
import java.io.FileFilter;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import net.filebot.mediainfo.MediaInfo;
import net.filebot.mediainfo.MediaInfo.StreamKind;
import net.filebot.similarity.NameSimilarityMetric;
import net.filebot.util.FastFile;
import net.filebot.web.Episode;
import net.filebot.web.Movie;
public class AutoDetection {
private File[] files;
private Locale locale;
public AutoDetection(Collection<File> root, boolean resolve, Locale locale) {
this.files = (resolve ? resolve(root.stream().map(FastFile::new), getSystemFilesFilter()) : root.stream()).toArray(File[]::new);
this.locale = locale;
}
protected Stream<File> resolve(Stream<File> root, FileFilter excludes) {
return root.flatMap(f -> {
if (f.isHidden() || excludes.accept(f)) {
return Stream.empty();
}
if (f.isFile()) {
return Stream.of(f);
}
if (f.isDirectory()) {
return isDiskFolder(f) ? Stream.of(f) : resolve(getChildren(f).stream(), excludes);
}
return Stream.empty();
});
}
public List<File> getFiles() {
return unmodifiableList(asList(files));
}
private static final Pattern MOVIE_PATTERN = Pattern.compile("Movies", CASE_INSENSITIVE);
private static final Pattern SERIES_PATTERN = Pattern.compile("TV.Shows|TV.Series|Season.[0-9]+", CASE_INSENSITIVE);
private static final Pattern ANIME_PATTERN = Pattern.compile("Anime", CASE_INSENSITIVE);
private static final Pattern EPISODE_PATTERN = Pattern.compile("E[P]?\\d{1,3}", CASE_INSENSITIVE);
private static final Pattern SERIES_EPISODE_PATTERN = Pattern.compile("^tv[sp][ _.-]", CASE_INSENSITIVE);
private static final Pattern ANIME_EPISODE_PATTERN = Pattern.compile("^\\[[^\\]]+Subs\\]", CASE_INSENSITIVE);
public boolean isMusic(File f) {
return AUDIO_FILES.accept(f) && !VIDEO_FILES.accept(f);
}
public boolean isMovie(File f) {
return anyMatch(f.getParentFile(), MOVIE_PATTERN) || MediaDetection.isMovie(f, true);
}
public boolean isEpisode(File f) {
if (MediaDetection.isEpisode(f.getName(), false) && (anyMatch(f.getParentFile(), SERIES_PATTERN) || find(f.getName(), SERIES_EPISODE_PATTERN))) {
return true;
}
if (MediaDetection.isEpisode(f.getPath(), true)) {
return true;
}
Object metaInfo = xattr.getMetaInfo(f);
return metaInfo instanceof Episode && !AniDB.getIdentifier().equals(((Episode) metaInfo).getSeriesInfo().getDatabase());
}
public boolean isAnime(File f) {
if (MediaDetection.parseEpisodeNumber(f.getName(), false) == null) {
return false;
}
if (anyMatch(f.getParentFile(), ANIME_PATTERN) || find(f.getName(), ANIME_EPISODE_PATTERN) || find(f.getName(), EMBEDDED_CHECKSUM)) {
return true;
}
if (VIDEO_FILES.accept(f) && f.length() > ONE_MEGABYTE) {
// check for Japanese audio or characteristic subtitles
try (MediaInfo mi = new MediaInfo().open(f)) {
long minutes = Duration.ofMillis(Long.parseLong(mi.get(StreamKind.General, 0, "Duration"))).toMinutes();
return minutes < 60 || mi.get(StreamKind.General, 0, "AudioLanguageList").contains("Japanese") && mi.get(StreamKind.General, 0, "TextCodecList").contains("ASS");
} catch (Exception e) {
debug.warning("Failed to read audio language: " + e.getMessage());
}
}
Object metaInfo = xattr.getMetaInfo(f);
return metaInfo instanceof Episode && AniDB.getIdentifier().equals(((Episode) metaInfo).getSeriesInfo().getDatabase());
}
public boolean anyMatch(File file, Pattern pattern) {
// episode characteristics override movie characteristics (e.g. episodes in ~/Movies folder which is considered a volume root)
for (File f = file; f != null && !MediaDetection.isVolumeRoot(f); f = f.getParentFile()) {
if (pattern.matcher(f.getName()).matches()) {
return true;
}
}
return false;
}
public Map<Group, Set<File>> group() {
// sort keys and values
Map<Group, Set<File>> groups = new TreeMap<Group, Set<File>>();
// can't use parallel stream because default fork/join pool doesn't play well with the security manager
ExecutorService workerThreadPool = Executors.newFixedThreadPool(getPreferredThreadPoolSize());
try {
stream(files).collect(toMap(f -> f, f -> workerThreadPool.submit(() -> detectGroup(f)))).forEach((file, group) -> {
try {
groups.computeIfAbsent(group.get(), k -> new TreeSet<File>()).add(file);
} catch (Exception e) {
debug.log(Level.SEVERE, e.getMessage(), e);
}
});
} finally {
workerThreadPool.shutdownNow();
}
return groups;
}
private Group detectGroup(File f) throws Exception {
Group group = new Group();
if (isMusic(f))
return group.music(f);
if (isMovie(f))
return group.movie(getMovieMatches(f, false));
if (isEpisode(f))
return group.series(getSeriesMatches(f, false));
if (isAnime(f))
return group.anime(getSeriesMatches(f, true));
// ignore movie matches if filename looks like an episode
if (find(f.getName(), EPISODE_PATTERN))
return group.series(getSeriesMatches(f, false));
// Movie VS Episode
List<Movie> m = getMovieMatches(f, false);
List<String> s = getSeriesMatches(f, false);
if (m.isEmpty() && s.isEmpty())
return group;
if (s.size() > 0 && m.isEmpty())
return group.series(s);
if (m.size() > 0 && s.isEmpty())
return group.movie(m);
return new Rules(f, s, m).apply();
}
private List<String> getSeriesMatches(File f, boolean anime) throws Exception {
List<String> names = detectSeriesNames(singleton(f), anime, locale);
if (names.isEmpty()) {
List<File> episodes = getVideoFiles(f.getParentFile());
if (episodes.size() >= 5) {
names = detectSeriesNames(episodes, anime, locale);
}
}
return names;
}
private List<Movie> getMovieMatches(File file, boolean strict) throws Exception {
return MediaDetection.detectMovie(file, TheMovieDB, locale, strict);
}
private List<File> getVideoFiles(File parent) {
return stream(files).filter(it -> parent.equals(it.getParentFile())).filter(VIDEO_FILES::accept).collect(toList());
}
private static final Pattern YEAR = Pattern.compile("\\D(?:19|20)\\d{2}\\D");
private static final Pattern EPISODE_NUMBERS = Pattern.compile("\\b\\d{1,3}\\b");
private static final Pattern DASH = Pattern.compile("^.{0,3}\\s[-]\\s.+$", UNICODE_CHARACTER_CLASS);
private static final Pattern NUMBER_PAIR = Pattern.compile("\\D\\d{1,2}\\D{1,3}\\d{1,2}\\D");
private static final Pattern NON_NUMBER_NAME = Pattern.compile("^[\\p{L}\\p{Space}\\p{Punct}]+$", UNICODE_CHARACTER_CLASS);
private class Rules {
private final Group group;
private final File f;
private final String s;
private final Movie m;
private final String dn, fn, sn, mn, my, asn;
private final Pattern snm, mnm;
public Rules(File file, List<String> series, List<Movie> movie) throws Exception {
group = new Group().series(series).movie(movie);
f = file;
s = series.get(0);
m = movie.get(0);
dn = normalize(getName(guessMovieFolder(f)));
fn = normalize(getName(f));
sn = normalize(s);
mn = normalize(m.getName());
my = Integer.toString(m.getYear());
snm = compile(sn, LITERAL);
mnm = compile(mn, LITERAL);
asn = after(fn, snm).orElse(fn);
}
private String normalize(String self) {
return self == null ? "" : replaceSpace(normalizePunctuation(ascii(self)).toLowerCase(), " ").trim();
}
private float getSimilarity(String self, String other) {
return new NameSimilarityMetric().getSimilarity(self, other);
}
public Group apply() throws Exception {
List<Rule> rules = new ArrayList<Rule>(15);
rules.add(new Rule(-1, 0, this::equalsMovieName));
rules.add(new Rule(-1, 0, this::containsMovieYear));
rules.add(new Rule(-1, 0, this::containsMovieNameYear));
rules.add(new Rule(5, -1, this::containsEpisodeNumbers));
rules.add(new Rule(5, -1, this::commonNumberPattern));
rules.add(new Rule(1, -1, this::episodeWithoutNumbers));
rules.add(new Rule(1, -1, this::episodeNumbers));
rules.add(new Rule(-1, 1, this::hasImdbId));
rules.add(new Rule(-1, 1, this::nonNumberName));
rules.add(new Rule(-1, 5, this::exactMovieMatch));
rules.add(new Rule(-1, 1, this::containsMovieName));
rules.add(new Rule(-1, 1, this::similarNameYear));
rules.add(new Rule(-1, 1, this::similarNameNoNumbers));
rules.add(new Rule(-1, 1, this::aliasNameMatch));
int score_s = 0;
int score_m = 0;
for (Rule rule : rules) {
if (rule.test()) {
score_s += rule.s;
score_m += rule.m;
if (score_s >= 1 && score_m <= -1) {
return group.movie(null);
}
if (score_m >= 1 && score_s <= -1) {
return group.series(null);
}
}
}
return group;
}
public boolean equalsMovieName() {
return mn.equals(fn);
}
public boolean containsMovieYear() {
return m.getYear() >= 1950 && listPathTail(f, 3, true).stream().anyMatch(it -> it.getName().contains(my) && parseEpisodeNumber(it.getName(), false) == null);
}
public boolean containsMovieNameYear() {
return mn.equals(sn) && Stream.of(dn, fn).anyMatch(it -> {
return parseEpisodeNumber(after(it, YEAR).orElse(""), false) == null;
});
}
public boolean containsEpisodeNumbers() {
return parseEpisodeNumber(fn, true) != null || parseDate(fn) != null;
}
public boolean commonNumberPattern() {
return getChildren(f.getParentFile(), VIDEO_FILES, HUMAN_NAME_ORDER).stream().filter(it -> {
return find(dn, snm) || find(normalize(it.getName()), snm);
}).map(it -> {
return streamMatches(it.getName(), EPISODE_NUMBERS).map(Integer::new).collect(toSet());
}).filter(it -> it.size() > 0).distinct().count() >= 10;
}
public boolean episodeWithoutNumbers() throws Exception {
return find(asn, DASH) && getMovieMatches(f, true).isEmpty();
}
public boolean episodeNumbers() throws Exception {
String n = stripReleaseInfo(asn, false);
if (parseEpisodeNumber(n, false) != null || NUMBER_PAIR.matcher(n).find()) {
return Stream.of(dn, fn).anyMatch(it -> snm.matcher(it).find()) && getMovieMatches(f, true).isEmpty();
}
return false;
}
public boolean hasImdbId() {
return grepImdbId(fn).size() > 0;
}
public boolean nonNumberName() {
return find(getName(f), NON_NUMBER_NAME);
}
public boolean exactMovieMatch() throws Exception {
return getMovieMatches(f, true).size() > 0 && Stream.of(dn, fn).anyMatch(it -> find(it, YEAR));
}
public boolean containsMovieName() {
return fn.contains(mn) && parseEpisodeNumber(after(fn, mnm).orElse(fn), false) == null;
}
public boolean similarNameYear() {
return getSimilarity(mn, fn) >= 0.8f || Stream.of(dn, fn).anyMatch(it -> {
return matchIntegers(it).stream().filter(y -> m.getYear() - 1 <= y && y <= m.getYear() + 1).count() > 0;
});
}
public boolean similarNameNoNumbers() {
return Stream.of(dn, fn).anyMatch(it -> {
return find(it, mnm) && !find(after(it, mnm).orElse(it), EPISODE_NUMBERS) && getSimilarity(it, mn) >= 0.2f + getSimilarity(it, sn);
});
}
public boolean aliasNameMatch() {
return m.getEffectiveNamesWithoutYear().stream().map(this::normalize).anyMatch(fn::contains);
}
}
@FunctionalInterface
private interface Test {
boolean test() throws Exception;
}
private static class Rule implements Test {
public final int s;
public final int m;
private final Test t;
public Rule(int s, int m, Test t) {
this.s = s;
this.m = m;
this.t = t;
}
@Override
public boolean test() throws Exception {
return t.test();
}
}
public enum Type {
Movie, Series, Anime, Music;
}
public static class Group extends EnumMap<Type, Object> implements Comparable<Group> {
public Group() {
super(Type.class);
}
public Object getMovie() {
return get(Type.Movie);
}
public Object getSeries() {
return get(Type.Series);
}
public Object getAnime() {
return get(Type.Anime);
}
public Object getMusic() {
return get(Type.Music);
}
public Group movie(List<Movie> movies) {
put(Type.Movie, movies == null || movies.isEmpty() ? null : movies.get(0));
return this;
}
public Group series(List<String> names) {
put(Type.Series, names == null || names.isEmpty() ? null : replaceSpace(normalizePunctuation(names.get(0)).toLowerCase(), " ").trim());
return this;
}
public Group anime(List<String> names) {
put(Type.Anime, names == null || names.isEmpty() ? null : replaceSpace(normalizePunctuation(names.get(0)).toLowerCase(), " ").trim());
return this;
}
public Group music(File f) {
put(Type.Music, f == null ? null : f.getParent());
return this;
}
public Type[] types() {
return entrySet().stream().filter(it -> it.getValue() != null).map(it -> it.getKey()).toArray(Type[]::new);
}
@Override
public int compareTo(Group other) {
if (size() != other.size()) {
return Integer.compare(size(), other.size());
}
return stream(Type.values()).mapToInt(t -> {
return Comparator.nullsLast(String.CASE_INSENSITIVE_ORDER).compare(Objects.toString(get(t), null), Objects.toString(other.get(t), null));
}).filter(i -> i != 0).findFirst().orElse(0);
}
}
}