Experiment with new CachedResource framework

This commit is contained in:
Reinhard Pointner 2016-03-08 12:59:24 +00:00
parent 7e5388e7fa
commit e95078668e
12 changed files with 203 additions and 78 deletions

View File

@ -21,13 +21,15 @@ def dir_data = "${dir_website}/data"
def input = new URL("https://raw.githubusercontent.com/filebot/data/master/${it}")
def output = new File("${dir_data}/${it}")
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
input.getText('UTF-8').split(/\R/)*.trim().findAll{ it.length() > 0 }.each{
set += Pattern.compile(it).pattern()
lines += Pattern.compile(it).pattern()
}
set.join('\n').saveAs(output)
println "${output}\n${output.text}\n"
pack(output, lines)
println lines.join('\n')
lines.join('\n').saveAs(output)
}

View File

@ -24,6 +24,10 @@ public class Cache {
return CacheManager.getInstance().getCache(name, type);
}
public <T> CachedResource2<T, byte[]> bytes(T key, Transform<T, URL> resource) {
return new CachedResource2<T, byte[]>(key, resource, fetchIfModified(), getBytes(), byte[].class::cast, ONE_DAY, this);
}
public <T> CachedResource2<T, String> text(T key, Transform<T, URL> resource) {
return new CachedResource2<T, String>(key, resource, fetchIfModified(), getText(UTF_8), String.class::cast, ONE_DAY, this);
}
@ -109,6 +113,14 @@ public class Cache {
}
}
public void flush() {
try {
cache.flush();
} catch (Exception e) {
debug.warning(format("Cache flush: %s => %s", cache.getName(), e));
}
}
@FunctionalInterface
public interface Compute<R> {
R apply(Element element) throws Exception;

View File

@ -11,6 +11,7 @@ import java.nio.channels.FileLock;
import java.nio.charset.Charset;
import java.nio.file.StandardOpenOption;
import java.util.Scanner;
import java.util.logging.Level;
import net.sf.ehcache.CacheException;
import net.sf.ehcache.config.Configuration;
@ -47,6 +48,10 @@ public class CacheManager {
manager.clearAll();
}
public void shutdown() {
manager.shutdown();
}
private Configuration getConfiguration() throws IOException {
Configuration config = new Configuration();
config.addDiskStore(getDiskStoreConfiguration());
@ -57,7 +62,7 @@ public class CacheManager {
// prepare cache folder for this application instance
File cacheRoot = getApplicationCache().getCanonicalFile();
for (int i = 0; true; i++) {
for (int i = 0; i < 10; i++) {
File cache = new File(cacheRoot, Integer.toString(i));
// make sure cache is readable and writable
@ -102,27 +107,7 @@ public class CacheManager {
}
// make sure to orderly shutdown cache
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
try {
manager.shutdown();
} catch (Exception e) {
// ignore, shutting down anyway
}
try {
lock.release();
} catch (Exception e) {
// ignore, shutting down anyway
}
try {
channel.close();
} catch (Exception e) {
// ignore, shutting down anyway
}
}
});
Runtime.getRuntime().addShutdownHook(new ShutdownHook(this, channel, lock));
// cache for this application instance is successfully set up and locked
return new DiskStoreConfiguration().path(cache.getPath());
@ -131,6 +116,45 @@ public class CacheManager {
// try next lock file
channel.close();
}
// serious error, abort
throw new IOException("Unable to acquire cache lock: " + cacheRoot);
}
private static class ShutdownHook extends Thread {
private final CacheManager manager;
private final FileChannel channel;
private final FileLock lock;
public ShutdownHook(CacheManager manager, FileChannel channel, FileLock lock) {
this.manager = manager;
this.channel = channel;
this.lock = lock;
}
@Override
public void run() {
try {
manager.shutdown();
} catch (Exception e) {
debug.log(Level.WARNING, "Shutdown hook failed: shutdown", e);
}
try {
lock.release();
} catch (Exception e) {
debug.log(Level.WARNING, "Shutdown hook failed: release", e);
}
try {
channel.close();
} catch (Exception e) {
debug.log(Level.WARNING, "Shutdown hook failed: close", e);
}
}
}
}

View File

@ -18,7 +18,7 @@ import net.filebot.web.WebRequest;
import org.w3c.dom.Document;
public class CachedResource2<K, R> {
public class CachedResource2<K, R> implements Resource<R> {
public static final int DEFAULT_RETRY_LIMIT = 2;
public static final Duration DEFAULT_RETRY_DELAY = Duration.ofSeconds(2);
@ -68,6 +68,7 @@ public class CachedResource2<K, R> {
return this;
}
@Override
public synchronized R get() throws Exception {
Object value = cache.computeIfStale(key, expirationTime, element -> {
URL url = resource.transform(key);
@ -117,6 +118,14 @@ public class CachedResource2<K, R> {
R transform(T object) throws Exception;
}
public static Transform<ByteBuffer, byte[]> getBytes() {
return (data) -> {
byte[] bytes = new byte[data.remaining()];
data.get(bytes, 0, bytes.length);
return bytes;
};
}
public static Transform<ByteBuffer, String> getText(Charset charset) {
return (data) -> charset.decode(data).toString();
}

View File

@ -0,0 +1,8 @@
package net.filebot;
@FunctionalInterface
public interface Resource<R> {
R get() throws Exception;
}

View File

@ -8,7 +8,6 @@ import static net.filebot.media.MediaDetection.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
@ -119,7 +118,7 @@ public final class WebServices {
// index of local thetvdb data dump
private static LocalSearch<SearchResult> localIndex;
public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
public synchronized LocalSearch<SearchResult> getLocalIndex() throws Exception {
if (localIndex == null) {
// fetch data dump
TheTVDBSearchResult[] data = releaseInfo.getTheTVDBIndex();
@ -181,7 +180,7 @@ public final class WebServices {
// index of local OpenSubtitles data dump
private static LocalSearch<SubtitleSearchResult> localIndex;
public synchronized LocalSearch<SubtitleSearchResult> getLocalIndex() throws IOException {
public synchronized LocalSearch<SubtitleSearchResult> getLocalIndex() throws Exception {
if (localIndex == null) {
// fetch data dump
SubtitleSearchResult[] data = releaseInfo.getOpenSubtitlesIndex();

View File

@ -340,7 +340,7 @@ public class ScriptShellMethods {
return JsonReader.jsonToJava(self);
}
public static File getStructurePathTail(File self) throws IOException {
public static File getStructurePathTail(File self) throws Exception {
return MediaDetection.getStructurePathTail(self);
}

View File

@ -497,7 +497,7 @@ public class MediaBindingBean {
}
@Define("group")
public String getReleaseGroup() throws IOException {
public String getReleaseGroup() throws Exception {
// use inferred media file
File inferredMediaFile = getInferredMediaFile();

View File

@ -81,7 +81,7 @@ public class MediaDetection {
public static FileFilter getClutterFileFilter() {
try {
return releaseInfo.getClutterFileFilter();
} catch (IOException e) {
} catch (Exception e) {
Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.SEVERE, "Unable to access clutter file filter: " + e.getMessage(), e);
}
return ((File f) -> false);
@ -740,7 +740,7 @@ public class MediaDetection {
return ranking;
}
public static List<Movie> sortMoviesBySimilarity(Collection<Movie> options, Collection<String> terms) throws IOException {
public static List<Movie> sortMoviesBySimilarity(Collection<Movie> options, Collection<String> terms) throws Exception {
Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
paragon.addAll(stripReleaseInfo(terms, true));
paragon.addAll(stripReleaseInfo(terms, false));
@ -1023,19 +1023,19 @@ public class MediaDetection {
return releaseInfo.cleanRelease(singleton(name), strict).iterator().next();
} catch (NoSuchElementException e) {
return ""; // default value in case all tokens are stripped away
} catch (IOException e) {
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static boolean isStructureRoot(File folder) throws IOException {
public static boolean isStructureRoot(File folder) throws Exception {
if (folder == null || folder.getName() == null || folder.getName().isEmpty() || releaseInfo.getVolumeRoots().contains(folder)) {
return true;
}
return releaseInfo.getStructureRootPattern().matcher(folder.getName()).matches();
}
public static File getStructureRoot(File file) throws IOException {
public static File getStructureRoot(File file) throws Exception {
boolean structureRoot = false;
for (File it : listPathTail(file, Integer.MAX_VALUE, true)) {
if (structureRoot || isStructureRoot(it)) {
@ -1048,7 +1048,7 @@ public class MediaDetection {
return null;
}
public static File getStructurePathTail(File file) throws IOException {
public static File getStructurePathTail(File file) throws Exception {
LinkedList<String> relativePath = new LinkedList<String>();
// iterate path in reverse
@ -1154,13 +1154,13 @@ public class MediaDetection {
return file.getParentFile();
}
public static List<String> stripReleaseInfo(Collection<String> names, boolean strict) throws IOException {
public static List<String> stripReleaseInfo(Collection<String> names, boolean strict) throws Exception {
return releaseInfo.cleanRelease(names, strict);
}
private static Pattern blacklistPattern;
public static List<String> stripBlacklistedTerms(Collection<String> names) throws IOException {
public static List<String> stripBlacklistedTerms(Collection<String> names) throws Exception {
if (blacklistPattern == null) {
blacklistPattern = releaseInfo.getBlacklistPattern();
}

View File

@ -1,6 +1,7 @@
package net.filebot.media;
import static java.lang.Integer.*;
import static java.nio.charset.StandardCharsets.*;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static java.util.ResourceBundle.*;
@ -9,14 +10,19 @@ import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.text.Collator;
import java.text.Normalizer;
import java.text.Normalizer.Form;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
@ -26,11 +32,17 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.filebot.Cache;
import net.filebot.CacheType;
import net.filebot.Resource;
import net.filebot.util.ByteBufferInputStream;
import net.filebot.util.FileUtilities.RegexFileFilter;
import net.filebot.web.AnidbSearchResult;
@ -87,9 +99,9 @@ public class ReleaseInfo {
return null;
}
public String getReleaseGroup(String... strings) throws IOException {
public String getReleaseGroup(String... strings) throws Exception {
// check file and folder for release group names
String[] groups = releaseGroupResource.get();
String[] groups = releaseGroup.get();
// try case-sensitive match
String match = matchLast(getReleaseGroupPattern(true), groups, strings);
@ -149,7 +161,7 @@ public class ReleaseInfo {
private final Map<Boolean, Pattern[]> stopwords = new HashMap<Boolean, Pattern[]>(2);
private final Map<Boolean, Pattern[]> blacklist = new HashMap<Boolean, Pattern[]>(2);
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
public List<String> cleanRelease(Collection<String> items, boolean strict) throws Exception {
Pattern[] stopwords;
Pattern[] blacklist;
@ -252,10 +264,10 @@ public class ReleaseInfo {
return volumeRoots;
}
public Pattern getStructureRootPattern() throws IOException {
public Pattern getStructureRootPattern() throws Exception {
if (structureRootFolderPattern == null) {
List<String> folders = new ArrayList<String>();
for (String it : queryBlacklistResource.get()) {
for (String it : queryBlacklist.get()) {
if (it.startsWith("^") && it.endsWith("$")) {
folders.add(it);
}
@ -310,44 +322,44 @@ public class ReleaseInfo {
return compile("(?:\\[([^\\[\\]]+?" + contentFilter + "[^\\[\\]]+?)\\])|(?:\\{([^\\{\\}]+?" + contentFilter + "[^\\{\\}]+?)\\})|(?:\\(([^\\(\\)]+?" + contentFilter + "[^\\(\\)]+?)\\))");
}
public Pattern getReleaseGroupPattern(boolean strict) throws IOException {
public Pattern getReleaseGroupPattern(boolean strict) throws Exception {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})" + or(releaseGroupResource.get()) + "(?!\\p{Alnum}|[^\\p{Alnum}](19|20)\\d{2})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
return compile("(?<!\\p{Alnum})" + or(releaseGroup.get()) + "(?!\\p{Alnum}|[^\\p{Alnum}](19|20)\\d{2})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getBlacklistPattern() throws IOException {
public Pattern getBlacklistPattern() throws Exception {
// pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})" + or(queryBlacklistResource.get()) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
return compile("(?<!\\p{Alnum})" + or(queryBlacklist.get()) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getExcludePattern() throws IOException {
public Pattern getExcludePattern() throws Exception {
// pattern matching any release group name enclosed in separators
return compile(or(excludeBlacklistResource.get()), CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
return compile(or(excludeBlacklist.get()), CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getCustomRemovePattern(Collection<String> terms) throws IOException {
return compile("(?<!\\p{Alnum})" + or(quoteAll(terms)) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Movie[] getMovieList() throws IOException {
return movieListResource.get();
public Movie[] getMovieList() throws Exception {
return movieIndex.get();
}
public TheTVDBSearchResult[] getTheTVDBIndex() throws IOException {
return tvdbIndexResource.get();
public TheTVDBSearchResult[] getTheTVDBIndex() throws Exception {
return tvdbIndex.get();
}
public AnidbSearchResult[] getAnidbIndex() throws IOException {
return anidbIndexResource.get();
public AnidbSearchResult[] getAnidbIndex() throws Exception {
return anidbIndex.get();
}
public SubtitleSearchResult[] getOpenSubtitlesIndex() throws IOException {
return osdbIndexResource.get();
public SubtitleSearchResult[] getOpenSubtitlesIndex() throws Exception {
return osdbIndex.get();
}
private Map<Pattern, String> seriesDirectMappings;
public Map<Pattern, String> getSeriesDirectMappings() throws IOException {
public Map<Pattern, String> getSeriesDirectMappings() throws Exception {
if (seriesDirectMappings == null) {
Map<Pattern, String> mappings = new LinkedHashMap<Pattern, String>();
for (String line : seriesDirectMappingsResource.get()) {
@ -381,7 +393,7 @@ public class ReleaseInfo {
private static ClutterFileFilter clutterFileFilter;
public FileFilter getClutterFileFilter() throws IOException {
public FileFilter getClutterFileFilter() throws Exception {
if (clutterFileFilter == null) {
clutterFileFilter = new ClutterFileFilter(getExcludePattern(), Long.parseLong(getProperty("number.clutter.maxfilesize"))); // only files smaller than 250 MB may be considered clutter
}
@ -397,18 +409,78 @@ public class ReleaseInfo {
}
// fetch release group names online and try to update the data every other day
protected final CachedResource<String[]> releaseGroupResource = new PatternResource(getProperty("url.release-groups"));
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getProperty("url.query-blacklist"));
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getProperty("url.exclude-blacklist"));
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getProperty("url.movie-list"));
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getProperty("url.series-mappings"));
protected final CachedResource<TheTVDBSearchResult[]> tvdbIndexResource = new TheTVDBIndexResource(getProperty("url.thetvdb-index"));
protected final CachedResource<AnidbSearchResult[]> anidbIndexResource = new AnidbIndexResource(getProperty("url.anidb-index"));
protected final CachedResource<SubtitleSearchResult[]> osdbIndexResource = new OpenSubtitlesIndexResource(getProperty("url.osdb-index"));
protected String getProperty(String propertyName) {
// allow override via Java System properties
return System.getProperty(propertyName, getBundle(ReleaseInfo.class.getName()).getString(propertyName));
protected final Resource<String[]> releaseGroup = patternResource("url.release-groups");
protected final Resource<String[]> queryBlacklist = patternResource("url.query-blacklist");
protected final Resource<String[]> excludeBlacklist = patternResource("url.exclude-blacklist");
protected final Resource<String[]> seriesDirectMappingsResource = patternResource("url.series-mappings");
protected final Resource<Movie[]> movieIndex = tsvResource("url.movie-list", this::parseMovie, Movie[]::new);
protected final Resource<TheTVDBSearchResult[]> tvdbIndex = tsvResource("url.thetvdb-index", this::parseSeries, TheTVDBSearchResult[]::new);
protected final Resource<AnidbSearchResult[]> anidbIndex = tsvResource("url.anidb-index", this::parseAnime, AnidbSearchResult[]::new);
protected final Resource<SubtitleSearchResult[]> osdbIndex = tsvResource("url.osdb-index", this::parseSubtitle, SubtitleSearchResult[]::new);
private Movie parseMovie(String[] v) {
int imdbid = parseInt(v[0]);
int tmdbid = parseInt(v[1]);
int year = parseInt(v[2]);
String name = v[3];
String[] aliasNames = copyOfRange(v, 4, v.length);
return new Movie(name, aliasNames, year, imdbid > 0 ? imdbid : -1, tmdbid > 0 ? tmdbid : -1, null);
}
private TheTVDBSearchResult parseSeries(String[] v) {
int id = parseInt(v[0]);
String name = v[1];
String[] aliasNames = copyOfRange(v, 2, v.length);
return new TheTVDBSearchResult(name, aliasNames, id);
}
private AnidbSearchResult parseAnime(String[] v) {
int aid = parseInt(v[0]);
String primaryTitle = v[1];
String[] aliasNames = copyOfRange(v, 2, v.length);
return new AnidbSearchResult(aid, primaryTitle, aliasNames);
}
private SubtitleSearchResult parseSubtitle(String[] v) {
String kind = v[0];
int score = parseInt(v[1]);
int imdbId = parseInt(v[2]);
int year = parseInt(v[3]);
String name = v[4];
String[] aliasNames = copyOfRange(v, 5, v.length);
return new SubtitleSearchResult(name, aliasNames, year, imdbId, -1, Locale.ENGLISH, SubtitleSearchResult.Kind.forName(kind), score);
}
protected Resource<String[]> patternResource(String name) {
return resource(name, Cache.ONE_WEEK, s -> {
return s.length() > 0 ? s : null;
}, String[]::new);
}
protected <T> Resource<T[]> tsvResource(String name, Function<String[], T> parse, IntFunction<T[]> generator) {
return resource(name, Cache.ONE_WEEK, s -> {
String[] v = s.split("\t");
return v.length > 0 ? parse.apply(v) : null;
}, generator);
}
protected <T> Resource<T[]> resource(String name, Duration expirationTime, Function<String, T> parse, IntFunction<T[]> generator) {
return () -> {
Cache cache = Cache.getCache("data", CacheType.Persistent);
byte[] bytes = cache.bytes(name, n -> new URL(getProperty(n))).expire(expirationTime).get();
// all data file are xz compressed
try (BufferedReader text = new BufferedReader(new InputStreamReader(new XZInputStream(new ByteArrayInputStream(bytes)), UTF_8))) {
return text.lines().map(parse).filter(Objects::nonNull).toArray(generator);
}
};
}
protected String getProperty(String name) {
// override resource locations via Java System properties
return System.getProperty(name, getBundle(ReleaseInfo.class.getName()).getString(name));
}
protected static class PatternResource extends CachedResource<String[]> {

View File

@ -14,19 +14,19 @@ pattern.subtitle.tags: forced|HI|SDH|Director.?s.Commentary
pattern.video.format: DivX|Xvid|AVC|x264|h264|h.264|HEVC|h265|h.265|3ivx|PGS|MPG|MPEG|MPEG4|MP3|FLAC|AAC|AAC2.0|AAC5.1|AAC.2.0|AAC.5.1|AC3|AC3|AC3.2.0|AC3|AC3.5.1|dd20|dd51|2ch|6ch|DTS|Multi.DTS|DTS.HD|DTS.HD.MA|TrueHD|720p|0720p|1080p|M1080|10bit|10.bit|24FPS|30FPS|60FPS|Hi10|Hi10P|[\\p{Alpha}]{2,3}.(2[.]0|5[.]1)|(19|20)[0-9]+(.)S[0-9]+(?!(.)?E[0-9]+)|(?<=\\d+)v[0-4]
# known release group names
url.release-groups: https://app.filebot.net/data/release-groups.txt
url.release-groups: https://app.filebot.net/data/release-groups.txt.xz
# blacklisted terms that will be ignored
url.query-blacklist: https://app.filebot.net/data/query-blacklist.txt
url.query-blacklist: https://app.filebot.net/data/query-blacklist.txt.xz
# clutter files that will be ignored
url.exclude-blacklist: https://app.filebot.net/data/exclude-blacklist.txt
url.exclude-blacklist: https://app.filebot.net/data/exclude-blacklist.txt.xz
# only files smaller than 250 MB may be considered clutter
number.clutter.maxfilesize: 262144000
# list of patterns directly matching files to series names
url.series-mappings: https://app.filebot.net/data/series-mappings.txt
url.series-mappings: https://app.filebot.net/data/series-mappings.txt.xz
# list of all movies (id, name, year)
url.movie-list: https://app.filebot.net/data/moviedb.txt.xz

View File

@ -9,7 +9,6 @@ import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
import java.io.File;
import java.io.IOException;
import java.time.LocalDate;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
@ -371,7 +370,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
if (names != null) {
try {
return stripReleaseInfo(names, true);
} catch (IOException e) {
} catch (Exception e) {
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
}
}