From 6a1ccef8d905b1eb0b7f88c1446e431dbc3e3fff Mon Sep 17 00:00:00 2001 From: echel0n Date: Thu, 27 Mar 2014 14:06:03 -0700 Subject: [PATCH] Replaced cache handler with a modified version of our own, gives quicker Indexer API responses to our requests --- lib/cachecontrol/__init__.py | 13 + lib/cachecontrol/adapter.py | 75 +++++ lib/cachecontrol/cache.py | 36 +++ lib/cachecontrol/caches/__init__.py | 18 ++ lib/cachecontrol/caches/file_cache.py | 51 ++++ lib/cachecontrol/caches/redis_cache.py | 46 ++++ lib/cachecontrol/compat.py | 12 + lib/cachecontrol/controller.py | 258 ++++++++++++++++++ lib/cachecontrol/patch_requests.py | 56 ++++ lib/cachecontrol/wrapper.py | 10 + lib/httpcache/__init__.py | 14 - lib/httpcache/adapter.py | 55 ---- lib/httpcache/cache.py | 207 -------------- lib/httpcache/compat.py | 10 - lib/httpcache/structures.py | 59 ---- lib/httpcache/utils.py | 97 ------- lib/requests_cache/__init__.py | 31 --- lib/requests_cache/backends/__init__.py | 50 ---- lib/requests_cache/backends/base.py | 171 ------------ lib/requests_cache/backends/mongo.py | 25 -- lib/requests_cache/backends/redis.py | 24 -- lib/requests_cache/backends/sqlite.py | 30 -- .../backends/storage/__init__.py | 0 lib/requests_cache/backends/storage/dbdict.py | 171 ------------ .../backends/storage/mongodict.py | 74 ----- .../backends/storage/redisdict.py | 68 ----- lib/requests_cache/compat.py | 103 ------- lib/requests_cache/core.py | 227 --------------- lib/tvdb_api/tvdb_api.py | 17 +- lib/tvrage_api/tvrage_api.py | 17 +- 30 files changed, 591 insertions(+), 1434 deletions(-) create mode 100644 lib/cachecontrol/__init__.py create mode 100644 lib/cachecontrol/adapter.py create mode 100644 lib/cachecontrol/cache.py create mode 100644 lib/cachecontrol/caches/__init__.py create mode 100644 lib/cachecontrol/caches/file_cache.py create mode 100644 lib/cachecontrol/caches/redis_cache.py create mode 100644 lib/cachecontrol/compat.py create mode 100644 lib/cachecontrol/controller.py create mode 100644 lib/cachecontrol/patch_requests.py create mode 100644 lib/cachecontrol/wrapper.py delete mode 100644 lib/httpcache/__init__.py delete mode 100644 lib/httpcache/adapter.py delete mode 100644 lib/httpcache/cache.py delete mode 100644 lib/httpcache/compat.py delete mode 100644 lib/httpcache/structures.py delete mode 100644 lib/httpcache/utils.py delete mode 100644 lib/requests_cache/__init__.py delete mode 100644 lib/requests_cache/backends/__init__.py delete mode 100644 lib/requests_cache/backends/base.py delete mode 100644 lib/requests_cache/backends/mongo.py delete mode 100644 lib/requests_cache/backends/redis.py delete mode 100644 lib/requests_cache/backends/sqlite.py delete mode 100644 lib/requests_cache/backends/storage/__init__.py delete mode 100644 lib/requests_cache/backends/storage/dbdict.py delete mode 100644 lib/requests_cache/backends/storage/mongodict.py delete mode 100644 lib/requests_cache/backends/storage/redisdict.py delete mode 100644 lib/requests_cache/compat.py delete mode 100644 lib/requests_cache/core.py diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py new file mode 100644 index 00000000..693e11f1 --- /dev/null +++ b/lib/cachecontrol/__init__.py @@ -0,0 +1,13 @@ +"""CacheControl import Interface. + +Make it easy to import from cachecontrol without long namespaces. +""" + +# patch our requests.models.Response to make them pickleable in older +# versions of requests. + +import cachecontrol.patch_requests + +from cachecontrol.wrapper import CacheControl +from cachecontrol.adapter import CacheControlAdapter +from cachecontrol.controller import CacheController diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py new file mode 100644 index 00000000..5a7d78d9 --- /dev/null +++ b/lib/cachecontrol/adapter.py @@ -0,0 +1,75 @@ +from requests.adapters import HTTPAdapter + +from cachecontrol.controller import CacheController +from cachecontrol.cache import DictCache + + +class CacheControlAdapter(HTTPAdapter): + invalidating_methods = set(['PUT', 'DELETE']) + + def __init__(self, cache=None, cache_etags=True, cache_all=False, *args, **kw): + super(CacheControlAdapter, self).__init__(*args, **kw) + self.cache = cache or DictCache() + self.controller = CacheController(self.cache, cache_etags=cache_etags, cache_all=cache_all) + + def send(self, request, **kw): + """Send a request. Use the request information to see if it + exists in the cache. + """ + if request.method == 'GET': + cached_response = self.controller.cached_request( + request.url, request.headers + ) + if cached_response: + # Cached responses should not have a raw field since + # they *cannot* be created from some stream. + cached_response.raw = None + return cached_response + + # check for etags and add headers if appropriate + headers = self.controller.add_headers(request.url) + request.headers.update(headers) + + resp = super(CacheControlAdapter, self).send(request, **kw) + return resp + + def build_response(self, request, response): + """Build a response by making a request or using the cache. + + This will end up calling send and returning a potentially + cached response + """ + resp = super(CacheControlAdapter, self).build_response( + request, response + ) + + # See if we should invalidate the cache. + if request.method in self.invalidating_methods and resp.ok: + cache_url = self.controller.cache_url(request.url) + self.cache.delete(cache_url) + + # Try to store the response if it is a GET + elif request.method == 'GET': + if response.status == 304: + # We must have sent an ETag request. This could mean + # that we've been expired already or that we simply + # have an etag. In either case, we want to try and + # update the cache if that is the case. + resp = self.controller.update_cached_response( + request, response + ) + # Fix possible exception when using missing `raw` field in + # requests + # TODO: remove when requests will be bump to 2.2.2 or 2.3 + # version + resp.raw = None + else: + # try to cache the response + self.controller.cache_response(request, resp) + + # Give the request a from_cache attr to let people use it + # rather than testing for hasattr. + if not hasattr(resp, 'from_cache'): + resp.from_cache = False + + return resp diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py new file mode 100644 index 00000000..feb7d3ed --- /dev/null +++ b/lib/cachecontrol/cache.py @@ -0,0 +1,36 @@ +""" +The cache object API for implementing caches. The default is just a +dictionary, which in turns means it is not threadsafe for writing. +""" +from threading import Lock + + +class BaseCache(object): + + def get(self, key): + raise NotImplemented() + + def set(self, key, value): + raise NotImplemented() + + def delete(self, key): + raise NotImplemented() + + +class DictCache(BaseCache): + + def __init__(self, init_dict=None): + self.lock = Lock() + self.data = init_dict or {} + + def get(self, key): + return self.data.get(key, None) + + def set(self, key, value): + with self.lock: + self.data.update({key: value}) + + def delete(self, key): + with self.lock: + if key in self.data: + self.data.pop(key) diff --git a/lib/cachecontrol/caches/__init__.py b/lib/cachecontrol/caches/__init__.py new file mode 100644 index 00000000..5e851b03 --- /dev/null +++ b/lib/cachecontrol/caches/__init__.py @@ -0,0 +1,18 @@ +from textwrap import dedent + +try: + from cachecontrol.caches.file_cache import FileCache +except ImportError: + notice = dedent(''' + NOTE: In order to use the FileCache you must have + lockfile installed. You can install it via pip: + pip install lockfile + ''') + print(notice) + + +try: + import redis + from cachecontrol.caches.redis_cache import RedisCache +except ImportError: + pass diff --git a/lib/cachecontrol/caches/file_cache.py b/lib/cachecontrol/caches/file_cache.py new file mode 100644 index 00000000..a75f700e --- /dev/null +++ b/lib/cachecontrol/caches/file_cache.py @@ -0,0 +1,51 @@ +import os +import sys +from hashlib import md5 + +try: + from pickle import load, dump, HIGHEST_PROTOCOL +except ImportError: + from cPickle import load, dump, HIGHEST_PROTOCOL + +from lockfile import FileLock + + +class FileCache(object): + def __init__(self, directory, forever=False): + self.directory = directory + self.forever = forever + + if not os.path.isdir(self.directory): + os.mkdir(self.directory) + + @staticmethod + def encode(x): + return md5(x.encode()).hexdigest() + + def _fn(self, name): + return os.path.join(self.directory, self.encode(name)) + + def get(self, key): + name = self._fn(key) + if not os.path.exists(name): + return None + + with open(name, 'rb') as fh: + try: + if sys.version < '3': + return load(fh) + else: + return load(fh, encoding='latin1') + except ValueError: + return None + + def set(self, key, value): + name = self._fn(key) + with FileLock(name) as lock: + with open(lock.path, 'wb') as fh: + dump(value, fh, HIGHEST_PROTOCOL) + + def delete(self, key): + name = self._fn(key) + if not self.forever: + os.remove(name) \ No newline at end of file diff --git a/lib/cachecontrol/caches/redis_cache.py b/lib/cachecontrol/caches/redis_cache.py new file mode 100644 index 00000000..d3814ebc --- /dev/null +++ b/lib/cachecontrol/caches/redis_cache.py @@ -0,0 +1,46 @@ +from __future__ import division + +from datetime import datetime + +try: + from cPickle import loads, dumps +except ImportError: # Python 3.x + from pickle import loads, dumps + + +def total_seconds(td): + """Python 2.6 compatability""" + if hasattr(td, 'total_seconds'): + return td.total_seconds() + + ms = td.microseconds + secs = (td.seconds + td.days * 24 * 3600) + return (ms + secs * 10**6) / 10**6 + + +class RedisCache(object): + + def __init__(self, conn): + self.conn = conn + + def get(self, key): + val = self.conn.get(key) + if val: + return loads(val) + return None + + def set(self, key, value, expires=None): + if not expires: + self.conn.set(key, dumps(value)) + else: + expires = expires - datetime.now() + self.conn.setex(key, total_seconds(expires), value) + + def delete(self, key): + self.conn.delete(key) + + def clear(self): + """Helper for clearing all the keys in a database. Use with + caution!""" + for key in self.conn.keys(): + self.conn.delete(key) diff --git a/lib/cachecontrol/compat.py b/lib/cachecontrol/compat.py new file mode 100644 index 00000000..1b6e596e --- /dev/null +++ b/lib/cachecontrol/compat.py @@ -0,0 +1,12 @@ +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + + +try: + import email.utils + parsedate_tz = email.utils.parsedate_tz +except ImportError: + import email.Utils + parsedate_tz = email.Utils.parsedate_tz diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py new file mode 100644 index 00000000..8aa2f985 --- /dev/null +++ b/lib/cachecontrol/controller.py @@ -0,0 +1,258 @@ +""" +The httplib2 algorithms ported for use with requests. +""" +import re +import calendar +import time +import datetime + +from cachecontrol.cache import DictCache +from cachecontrol.compat import parsedate_tz + + +URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") + + +def parse_uri(uri): + """Parses a URI using the regex given in Appendix B of RFC 3986. + + (scheme, authority, path, query, fragment) = parse_uri(uri) + """ + groups = URI.match(uri).groups() + return (groups[1], groups[3], groups[4], groups[6], groups[8]) + + +class CacheController(object): + """An interface to see if request should cached or not. + """ + def __init__(self, cache=None, cache_etags=True, cache_all=False): + self.cache = cache or DictCache() + self.cache_etags = cache_etags + self.cache_all = cache_all + + def _urlnorm(self, uri): + """Normalize the URL to create a safe key for the cache""" + (scheme, authority, path, query, fragment) = parse_uri(uri) + if not scheme or not authority: + raise Exception("Only absolute URIs are allowed. uri = %s" % uri) + authority = authority.lower() + scheme = scheme.lower() + if not path: + path = "/" + + # Could do syntax based normalization of the URI before + # computing the digest. See Section 6.2.2 of Std 66. + request_uri = query and "?".join([path, query]) or path + scheme = scheme.lower() + defrag_uri = scheme + "://" + authority + request_uri + + return defrag_uri + + def cache_url(self, uri): + return self._urlnorm(uri) + + def parse_cache_control(self, headers): + """ + Parse the cache control headers returning a dictionary with values + for the different directives. + """ + retval = {} + + cc_header = 'cache-control' + if 'Cache-Control' in headers: + cc_header = 'Cache-Control' + + if cc_header in headers: + parts = headers[cc_header].split(',') + parts_with_args = [ + tuple([x.strip().lower() for x in part.split("=", 1)]) + for part in parts if -1 != part.find("=")] + parts_wo_args = [(name.strip().lower(), 1) + for name in parts if -1 == name.find("=")] + retval = dict(parts_with_args + parts_wo_args) + return retval + + def cached_request(self, url, headers): + cache_url = self.cache_url(url) + cc = self.parse_cache_control(headers) + + # non-caching states + no_cache = True if 'no-cache' in cc else False + if 'max-age' in cc and cc['max-age'] == 0: + no_cache = True + + # see if it is in the cache anyways + in_cache = self.cache.get(cache_url) + if no_cache or not in_cache: + return False + + # It is in the cache, so lets see if it is going to be + # fresh enough + resp = self.cache.get(cache_url) + + # Check our Vary header to make sure our request headers match + # up. We don't delete it from the though, we just don't return + # our cached value. + # + # NOTE: Because httplib2 stores raw content, it denotes + # headers that were sent in the original response by + # adding -varied-$name. We don't have to do that b/c we + # are storing the object which has a reference to the + # original request. If that changes, then I'd propose + # using the varied headers in the cache key to avoid the + # situation all together. + if 'vary' in resp.headers: + varied_headers = resp.headers['vary'].replace(' ', '').split(',') + original_headers = resp.request.headers + for header in varied_headers: + # If our headers don't match for the headers listed in + # the vary header, then don't use the cached response + if headers.get(header, None) != original_headers.get(header): + return False + + now = time.time() + date = calendar.timegm( + parsedate_tz(resp.headers['date']) + ) + current_age = max(0, now - date) + + # TODO: There is an assumption that the result will be a + # requests response object. This may not be best since we + # could probably avoid instantiating or constructing the + # response until we know we need it. + resp_cc = self.parse_cache_control(resp.headers) + + # determine freshness + freshness_lifetime = 0 + if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): + freshness_lifetime = int(resp_cc['max-age']) + elif 'expires' in resp.headers: + expires = parsedate_tz(resp.headers['expires']) + if expires is not None: + expire_time = calendar.timegm(expires) - date + freshness_lifetime = max(0, expire_time) + + # determine if we are setting freshness limit in the req + if 'max-age' in cc: + try: + freshness_lifetime = int(cc['max-age']) + except ValueError: + freshness_lifetime = 0 + + if 'min-fresh' in cc: + try: + min_fresh = int(cc['min-fresh']) + except ValueError: + min_fresh = 0 + # adjust our current age by our min fresh + current_age += min_fresh + + # see how fresh we actually are + fresh = (freshness_lifetime > current_age) + + if fresh: + # make sure we set the from_cache to true + resp.from_cache = True + return resp + + # we're not fresh. If we don't have an Etag, clear it out + if 'etag' not in resp.headers: + self.cache.delete(cache_url) + + if 'etag' in resp.headers: + headers['If-None-Match'] = resp.headers['ETag'] + + if 'last-modified' in resp.headers: + headers['If-Modified-Since'] = resp.headers['Last-Modified'] + + # return the original handler + return False + + def add_headers(self, url): + resp = self.cache.get(url) + if resp and 'etag' in resp.headers: + return {'If-None-Match': resp.headers['etag']} + return {} + + def cache_response(self, request, resp): + """ + Algorithm for caching requests. + + This assumes a requests Response object. + """ + # From httplib2: Don't cache 206's since we aren't going to + # handle byte range requests + if resp.status_code not in [200, 203]: + return + + cc_req = self.parse_cache_control(request.headers) + cc = self.parse_cache_control(resp.headers) + + cache_url = self.cache_url(request.url) + + # Delete it from the cache if we happen to have it stored there + no_store = cc.get('no-store') or cc_req.get('no-store') + if no_store and self.cache.get(cache_url): + self.cache.delete(cache_url) + + # If we've been given an etag, then keep the response + if self.cache_etags and 'etag' in resp.headers: + self.cache.set(cache_url, resp) + + # If we want to cache sites not setup with cache headers then add the proper headers and keep the response + if self.cache_all: + expires = datetime.datetime.utcnow() + datetime.timedelta(days=(25 * 365)) + expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") + headers = {'Cache-Control': 'public,max-age=%d' % int(3600), + 'Expires': expires} + resp.headers.update(headers) + self.cache.set(cache_url, resp) + + # Add to the cache if the response headers demand it. If there + # is no date header then we can't do anything about expiring + # the cache. + elif 'date' in resp.headers: + # cache when there is a max-age > 0 + if cc and cc.get('max-age'): + if int(cc['max-age']) > 0: + self.cache.set(cache_url, resp) + + # If the request can expire, it means we should cache it + # in the meantime. + elif 'expires' in resp.headers: + if resp.headers['expires']: + self.cache.set(cache_url, resp) + + def update_cached_response(self, request, response): + """On a 304 we will get a new set of headers that we want to + update our cached value with, assuming we have one. + + This should only ever be called when we've sent an ETag and + gotten a 304 as the response. + """ + cache_url = self.cache_url(request.url) + + resp = self.cache.get(cache_url) + + if not resp: + # we didn't have a cached response + return response + + # did so lets update our headers + resp.headers.update(resp.headers) + + # we want a 200 b/c we have content via the cache + request.status_code = 200 + + # update the request as it has the if-none-match header + any + # other headers that the server might have updated (ie Date, + # Cache-Control, Expires, etc.) + resp.request = request + + # update our cache + self.cache.set(cache_url, resp) + + # Let everyone know this was from the cache. + resp.from_cache = True + + return resp diff --git a/lib/cachecontrol/patch_requests.py b/lib/cachecontrol/patch_requests.py new file mode 100644 index 00000000..cad60e17 --- /dev/null +++ b/lib/cachecontrol/patch_requests.py @@ -0,0 +1,56 @@ +import requests + +from requests import models +from requests.packages.urllib3.response import HTTPResponse + +__attrs__ = [ + '_content', + 'status_code', + 'headers', + 'url', + 'history', + 'encoding', + 'reason', + 'cookies', + 'elapsed', +] + + +def response_getstate(self): + # consume everything + if not self._content_consumed: + self.content + + state = dict( + (attr, getattr(self, attr, None)) + for attr in __attrs__ + ) + + # deal with our raw content b/c we need it for our cookie jar + state['raw_original_response'] = self.raw._original_response + return state + + +def response_setstate(self, state): + for name, value in state.items(): + if name != 'raw_original_response': + setattr(self, name, value) + + setattr(self, 'raw', HTTPResponse()) + self.raw._original_response = state['raw_original_response'] + + +def make_responses_pickleable(): + try: + version_parts = [int(part) for part in requests.__version__.split('.')] + + # must be >= 2.2.x + if not version_parts[0] >= 2 or not version_parts[1] >= 2: + models.Response.__getstate__ = response_getstate + models.Response.__setstate__ = response_setstate + except: + raise + pass + + +make_responses_pickleable() diff --git a/lib/cachecontrol/wrapper.py b/lib/cachecontrol/wrapper.py new file mode 100644 index 00000000..be3d932c --- /dev/null +++ b/lib/cachecontrol/wrapper.py @@ -0,0 +1,10 @@ +from cachecontrol.adapter import CacheControlAdapter +from cachecontrol.cache import DictCache + + +def CacheControl(sess, cache=None, cache_etags=True, cache_all=False): + cache = cache or DictCache() + adapter = CacheControlAdapter(cache, cache_etags=cache_etags, cache_all=cache_all) + sess.mount('http://', adapter) + + return sess diff --git a/lib/httpcache/__init__.py b/lib/httpcache/__init__.py deleted file mode 100644 index 0b8a963c..00000000 --- a/lib/httpcache/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- -""" -__init__.py -~~~~~~~~~~~ - -Defines the public API to the httpcache module. -""" - -__version__ = '0.1.3' - -from .cache import HTTPCache -from .adapter import CachingHTTPAdapter - -__all__ = [HTTPCache, CachingHTTPAdapter] diff --git a/lib/httpcache/adapter.py b/lib/httpcache/adapter.py deleted file mode 100644 index b1d511bf..00000000 --- a/lib/httpcache/adapter.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -adapter.py -~~~~~~~~~~ - -Contains an implementation of an HTTP adapter for Requests that is aware of the -cache contained in this module. -""" -from requests.adapters import HTTPAdapter -from .cache import HTTPCache - - -class CachingHTTPAdapter(HTTPAdapter): - """ - A HTTP-caching-aware Transport Adapter for Python Requests. The central - portion of the API. - - :param capacity: The maximum capacity of the backing cache. - """ - def __init__(self, capacity=50, **kwargs): - super(CachingHTTPAdapter, self).__init__(**kwargs) - - #: The HTTP Cache backing the adapter. - self.cache = HTTPCache(capacity=capacity) - - def send(self, request, **kwargs): - """ - Sends a PreparedRequest object, respecting RFC 2616's rules about HTTP - caching. Returns a Response object that may have been cached. - - :param request: The Requests :class:`PreparedRequest ` object to send. - """ - cached_resp = self.cache.retrieve(request) - - if cached_resp is not None: - return cached_resp - else: - return super(CachingHTTPAdapter, self).send(request, **kwargs) - - def build_response(self, request, response): - """ - Builds a Response object from a urllib3 response. May involve returning - a cached Response. - - :param request: The Requests :class:`PreparedRequest ` object sent. - :param response: The urllib3 response. - """ - resp = super(CachingHTTPAdapter, self).build_response(request, - response) - - if resp.status_code == 304: - resp = self.cache.handle_304(resp) - else: - self.cache.store(resp) - - return resp diff --git a/lib/httpcache/cache.py b/lib/httpcache/cache.py deleted file mode 100644 index 892e8f90..00000000 --- a/lib/httpcache/cache.py +++ /dev/null @@ -1,207 +0,0 @@ -# -*- coding: utf-8 -*- -""" -cache.py -~~~~~~~~ - -Contains the primary cache structure used in http-cache. -""" -from .structures import RecentOrderedDict -from .utils import (parse_date_header, build_date_header, - expires_from_cache_control, url_contains_query) -from datetime import datetime - - -# RFC 2616 specifies that we can cache 200 OK, 203 Non Authoritative, -# 206 Partial Content, 300 Multiple Choices, 301 Moved Permanently and -# 410 Gone responses. We don't cache 206s at the moment because we -# don't handle Range and Content-Range headers. -CACHEABLE_RCS = (200, 203, 300, 301, 410) - -# Cacheable verbs. -CACHEABLE_VERBS = ('GET', 'HEAD', 'OPTIONS') - -# Some verbs MUST invalidate the resource in the cache, according to RFC 2616. -# If we send one of these, or any verb we don't recognise, invalidate the -# cache entry for that URL. As it happens, these are also the cacheable -# verbs. That works out well for us. -NON_INVALIDATING_VERBS = CACHEABLE_VERBS - - -class HTTPCache(object): - """ - The HTTP Cache object. Manages caching of responses according to RFC 2616, - adding necessary headers to HTTP request objects, and returning cached - responses based on server responses. - - This object is not expected to be used by most users. It is exposed as part - of the public API for users who feel the need for more control. This API - may change in a minor version increase. Be warned. - - :param capacity: (Optional) The maximum capacity of the HTTP cache. - """ - def __init__(self, capacity=50): - #: The maximum capacity of the HTTP cache. When this many cache entries - #: end up in the cache, the oldest entries are removed. - self.capacity = capacity - - #: The cache backing store. Cache entries are stored here as key-value - #: pairs. The key is the URL used to retrieve the cached response. The - #: value is a python dict, which stores three objects: the response - #: (keyed off of 'response'), the retrieval or creation date (keyed off - #: of 'creation') and the cache expiry date (keyed off of 'expiry'). - #: This last value may be None. - self._cache = RecentOrderedDict() - - def store(self, response): - """ - Takes an HTTP response object and stores it in the cache according to - RFC 2616. Returns a boolean value indicating whether the response was - cached or not. - - :param response: Requests :class:`Response ` object to cache. - """ - # Define an internal utility function. - def date_header_or_default(header_name, default, response): - try: - date_header = response.headers[header_name] - except KeyError: - value = default - else: - value = parse_date_header(date_header) - return value - - if response.status_code not in CACHEABLE_RCS: - return False - - if response.request.method not in CACHEABLE_VERBS: - return False - - url = response.url - now = datetime.utcnow() - - # Get the value of the 'Date' header, if it exists. If it doesn't, just - # use now. - creation = date_header_or_default('Date', now, response) - - # Get the value of the 'Cache-Control' header, if it exists. - cc = response.headers.get('Cache-Control', None) - if cc is not None: - expiry = expires_from_cache_control(cc, now) - - # If the above returns None, we are explicitly instructed not to - # cache this. - if expiry is None: - return False - - # Get the value of the 'Expires' header, if it exists, and if we don't - # have anything from the 'Cache-Control' header. - if cc is None: - expiry = date_header_or_default('Expires', None, response) - - # If the expiry date is earlier or the same as the Date header, don't - # cache the response at all. - if expiry is not None and expiry <= creation: - return False - - # If there's a query portion of the url and it's a GET, don't cache - # this unless explicitly instructed to. - if expiry is None and response.request.method == 'GET': - if url_contains_query(url): - return False - - self._cache[url] = {'response': response, - 'creation': creation, - 'expiry': expiry} - - self.__reduce_cache_count() - - return True - - def handle_304(self, response): - """ - Given a 304 response, retrieves the cached entry. This unconditionally - returns the cached entry, so it can be used when the 'intelligent' - behaviour of retrieve() is not desired. - - Returns None if there is no entry in the cache. - - :param response: The 304 response to find the cached entry for. Should be a Requests :class:`Response `. - """ - try: - cached_response = self._cache[response.url]['response'] - except KeyError: - cached_response = None - - return cached_response - - def retrieve(self, request): - """ - Retrieves a cached response if possible. - - If there is a response that can be unconditionally returned (e.g. one - that had a Cache-Control header set), that response is returned. If - there is one that can be conditionally returned (if a 304 is returned), - applies an If-Modified-Since header to the request and returns None. - - :param request: The Requests :class:`PreparedRequest ` object. - """ - return_response = None - url = request.url - - try: - cached_response = self._cache[url] - except KeyError: - return None - - if request.method not in NON_INVALIDATING_VERBS: - del self._cache[url] - return None - - if cached_response['expiry'] is None: - # We have no explicit expiry time, so we weren't instructed to - # cache. Add an 'If-Modified-Since' header. - creation = cached_response['creation'] - header = build_date_header(creation) - request.headers['If-Modified-Since'] = header - else: - # We have an explicit expiry time. If we're earlier than the expiry - # time, return the response. - now = datetime.utcnow() - - if now <= cached_response['expiry']: - return_response = cached_response['response'] - else: - del self._cache[url] - - return return_response - - def __reduce_cache_count(self): - """ - Drops the number of entries in the cache to the capacity of the cache. - - Walks the backing RecentOrderedDict in order from oldest to youngest. - Deletes cache entries that are either invalid or being speculatively - cached until the number of cache entries drops to the capacity. If this - leaves the cache above capacity, begins deleting the least-used cache - entries that are still valid until the cache has space. - """ - if len(self._cache) <= self.capacity: - return - - to_delete = len(self._cache) - self.capacity - keys = list(self._cache.keys()) - - for key in keys: - if self._cache[key]['expiry'] is None: - del self._cache[key] - to_delete -= 1 - - if to_delete == 0: - return - - keys = list(self._cache.keys()) - - for i in range(to_delete): - del self._cache[keys[i]] - - return diff --git a/lib/httpcache/compat.py b/lib/httpcache/compat.py deleted file mode 100644 index 384d38e6..00000000 --- a/lib/httpcache/compat.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- -""" -compat.py -~~~~~~~~~ - -Defines cross-platform functions and classes needed to achieve proper -functionality. -""" - -pass diff --git a/lib/httpcache/structures.py b/lib/httpcache/structures.py deleted file mode 100644 index 6ac24a92..00000000 --- a/lib/httpcache/structures.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -structures.py -~~~~~~~~~~~~~ - -Defines structures used by the httpcache module. -""" - -class RecentOrderedDict(dict): - """ - A custom variant of the dictionary that ensures that the object most - recently inserted _or_ retrieved from the dictionary is enumerated first. - """ - def __init__(self): - self._data = {} - self._order = [] - - def __setitem__(self, key, value): - if key in self._data: - self._order.remove(key) - - self._order.append(key) - self._data[key] = value - - def __getitem__(self, key): - value = self._data[key] - self._order.remove(key) - self._order.append(key) - return value - - def __delitem__(self, key): - del self._data[key] - self._order.remove(key) - - def __iter__(self): - return self._order - - def __len__(self): - return len(self._order) - - def __contains__(self, value): - return self._data.__contains__(value) - - def items(self): - return [(key, self._data[key]) for key in self._order] - - def keys(self): - return self._order - - def values(self): - return [self._data[key] for key in self._order] - - def clear(self): - self._data = {} - self._order = [] - - def copy(self): - c = RecentOrderedDict() - c._data = self._data.copy() - c._order = self._order[:] diff --git a/lib/httpcache/utils.py b/lib/httpcache/utils.py deleted file mode 100644 index 0efe9f99..00000000 --- a/lib/httpcache/utils.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- -""" -utils.py -~~~~~~~~ - -Utility functions for use with httpcache. -""" -from datetime import datetime, timedelta - -try: # Python 2 - from urlparse import urlparse -except ImportError: # Python 3 - from urllib.parse import urlparse - -RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" -RFC_850_DT_STR = "%A, %d-%b-%y %H:%M:%S GMT" - - -def parse_date_header(header): - """ - Given a date header in the form specified by RFC 2616, return a Python - datetime object. - - RFC 2616 specifies three possible formats for date/time headers, and - makes it clear that all dates/times should be in UTC/GMT. That is assumed - by this library, which simply does everything in UTC. This currently does - not parse the C asctime() string, because that's effort. - - This function does _not_ follow Postel's Law. If a format does not strictly - match the defined strings, this function returns None. This is considered - 'safe' behaviour. - """ - try: - dt = datetime.strptime(header, RFC_1123_DT_STR) - except ValueError: - try: - dt = datetime.strptime(header, RFC_850_DT_STR) - except ValueError: - dt = None - except TypeError: - dt = None - - return dt - - -def build_date_header(dt): - """ - Given a Python datetime object, build a Date header value according to - RFC 2616. - - RFC 2616 specifies that the RFC 1123 form is to be preferred, so that is - what we use. - """ - return dt.strftime(RFC_1123_DT_STR) - - -def expires_from_cache_control(header, current_time): - """ - Given a Cache-Control header, builds a Python datetime object corresponding - to the expiry time (in UTC). This function should respect all relevant - Cache-Control directives. - - Takes current_time as an argument to ensure that 'max-age=0' generates the - correct behaviour without being special-cased. - - Returns None to indicate that a request must not be cached. - """ - # Cache control header values are made of multiple comma separated fields. - # Splitting them like this is probably a bad idea, but I'm going to roll with - # it for now. We'll come back to it. - fields = header.split(', ') - duration = None - - for field in fields: - # Right now we don't handle no-cache applied to specific fields. To be - # as 'nice' as possible, treat any no-cache as applying to the whole - # request. Bail early, because there's no reason to stick around. - if field.startswith('no-cache') or field == 'no-store': - return None - - if field.startswith('max-age'): - _, duration = field.split('=') - duration = int(duration) - - if duration: - interval = timedelta(seconds=int(duration)) - return current_time + interval - -def url_contains_query(url): - """ - A very stupid function for determining if a URL contains a query string - or not. - """ - if urlparse(url).query: - return True - else: - return False diff --git a/lib/requests_cache/__init__.py b/lib/requests_cache/__init__.py deleted file mode 100644 index 2f83d174..00000000 --- a/lib/requests_cache/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache - ~~~~~~~~~~~~~~ - - Transparent cache for ``requests`` library with persistence and async support - - Just write:: - - import requests_cache - requests_cache.install_cache() - - And requests to resources will be cached for faster repeated access:: - - import requests - for i in range(10): - r = requests.get('http://httpbin.org/delay/5') - # will took approximately 5 seconds instead 50 - - - :copyright: (c) 2012 by Roman Haritonov. - :license: BSD, see LICENSE for more details. -""" -__docformat__ = 'restructuredtext' -__version__ = '0.4.4' - -from .core import( - CachedSession, install_cache, uninstall_cache, - disabled, enabled, get_cache, clear, configure -) \ No newline at end of file diff --git a/lib/requests_cache/backends/__init__.py b/lib/requests_cache/backends/__init__.py deleted file mode 100644 index 29da6724..00000000 --- a/lib/requests_cache/backends/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends - ~~~~~~~~~~~~~~~~~~~~~~~ - - Classes and functions for cache persistence -""" - - -from .base import BaseCache - -registry = { - 'memory': BaseCache, -} - -try: - # Heroku doesn't allow the SQLite3 module to be installed - from .sqlite import DbCache - registry['sqlite'] = DbCache -except ImportError: - DbCache = None - -try: - from .mongo import MongoCache - registry['mongo'] = registry['mongodb'] = MongoCache -except ImportError: - MongoCache = None - -try: - from .redis import RedisCache - registry['redis'] = RedisCache -except ImportError: - RedisCache = None - - -def create_backend(backend_name, cache_name, options): - if backend_name is None: - backend_name = _get_default_backend_name() - try: - return registry[backend_name](cache_name, **options) - except KeyError: - raise ValueError('Unsupported backend "%s" try one of: %s' % - (backend_name, ', '.join(registry.keys()))) - - -def _get_default_backend_name(): - if 'sqlite' in registry: - return 'sqlite' - return 'memory' \ No newline at end of file diff --git a/lib/requests_cache/backends/base.py b/lib/requests_cache/backends/base.py deleted file mode 100644 index bc13518c..00000000 --- a/lib/requests_cache/backends/base.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.base - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Contains BaseCache class which can be used as in-memory cache backend or - extended to support persistence. -""" -from datetime import datetime -import hashlib -from copy import copy - -import requests - -from ..compat import is_py2 - - -class BaseCache(object): - """ Base class for cache implementations, can be used as in-memory cache. - - To extend it you can provide dictionary-like objects for - :attr:`keys_map` and :attr:`responses` or override public methods. - """ - def __init__(self, *args, **kwargs): - #: `key` -> `key_in_responses` mapping - self.keys_map = {} - #: `key_in_cache` -> `response` mapping - self.responses = {} - - def save_response(self, key, response): - """ Save response to cache - - :param key: key for this response - :param response: response to save - - .. note:: Response is reduced before saving (with :meth:`reduce_response`) - to make it picklable - """ - self.responses[key] = self.reduce_response(response), datetime.utcnow() - - def add_key_mapping(self, new_key, key_to_response): - """ - Adds mapping of `new_key` to `key_to_response` to make it possible to - associate many keys with single response - - :param new_key: new key (e.g. url from redirect) - :param key_to_response: key which can be found in :attr:`responses` - :return: - """ - self.keys_map[new_key] = key_to_response - - def get_response_and_time(self, key, default=(None, None)): - """ Retrieves response and timestamp for `key` if it's stored in cache, - otherwise returns `default` - - :param key: key of resource - :param default: return this if `key` not found in cache - :returns: tuple (response, datetime) - - .. note:: Response is restored after unpickling with :meth:`restore_response` - """ - try: - if key not in self.responses: - key = self.keys_map[key] - response, timestamp = self.responses[key] - except KeyError: - return default - return self.restore_response(response), timestamp - - def delete(self, key): - """ Delete `key` from cache. Also deletes all responses from response history - """ - try: - if key in self.responses: - response, _ = self.responses[key] - del self.responses[key] - else: - response, _ = self.responses[self.keys_map[key]] - del self.keys_map[key] - for r in response.history: - del self.keys_map[self.create_key(r.request)] - except KeyError: - pass - - def delete_url(self, url): - """ Delete response associated with `url` from cache. - Also deletes all responses from response history. Works only for GET requests - """ - self.delete(self._url_to_key(url)) - - def clear(self): - """ Clear cache - """ - self.responses.clear() - self.keys_map.clear() - - def has_key(self, key): - """ Returns `True` if cache has `key`, `False` otherwise - """ - return key in self.responses or key in self.keys_map - - def has_url(self, url): - """ Returns `True` if cache has `url`, `False` otherwise. - Works only for GET request urls - """ - return self.has_key(self._url_to_key(url)) - - def _url_to_key(self, url): - from requests import Request - return self.create_key(Request('GET', url).prepare()) - - _response_attrs = ['_content', 'url', 'status_code', 'cookies', - 'headers', 'encoding', 'request', 'reason', 'raw'] - - _raw_response_attrs = ['_original_response', 'decode_content', 'headers', - 'reason', 'status', 'strict', 'version'] - - def reduce_response(self, response): - """ Reduce response object to make it compatible with ``pickle`` - """ - result = _Store() - # prefetch - response.content - for field in self._response_attrs: - setattr(result, field, self._picklable_field(response, field)) - result.history = tuple(self.reduce_response(r) for r in response.history) - return result - - def _picklable_field(self, response, name): - value = getattr(response, name) - if name == 'request': - value = copy(value) - value.hooks = [] - elif name == 'raw': - result = _Store() - for field in self._raw_response_attrs: - setattr(result, field, getattr(value, field, None)) - value = result - return value - - def restore_response(self, response): - """ Restore response object after unpickling - """ - result = requests.Response() - for field in self._response_attrs: - setattr(result, field, getattr(response, field, None)) - result.history = tuple(self.restore_response(r) for r in response.history) - return result - - def create_key(self, request): - key = hashlib.sha256() - key.update(_to_bytes(request.method.upper())) - key.update(_to_bytes(request.url)) - if request.body: - key.update(_to_bytes(request.body)) - return key.hexdigest() - - def __str__(self): - return 'keys: %s\nresponses: %s' % (self.keys_map, self.responses) - - -# used for saving response attributes -class _Store(object): - pass - - -def _to_bytes(s, encoding='utf-8'): - if is_py2 or isinstance(s, bytes): - return s - return bytes(s, encoding) \ No newline at end of file diff --git a/lib/requests_cache/backends/mongo.py b/lib/requests_cache/backends/mongo.py deleted file mode 100644 index 084d2ac4..00000000 --- a/lib/requests_cache/backends/mongo.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.mongo - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - ``mongo`` cache backend -""" -from .base import BaseCache -from .storage.mongodict import MongoDict, MongoPickleDict - - -class MongoCache(BaseCache): - """ ``mongo`` cache backend. - """ - def __init__(self, db_name='requests-cache', **options): - """ - :param db_name: database name (default: ``'requests-cache'``) - :param connection: (optional) ``pymongo.Connection`` - """ - super(MongoCache, self).__init__() - self.responses = MongoPickleDict(db_name, 'responses', - options.get('connection')) - self.keys_map = MongoDict(db_name, 'urls', self.responses.connection) - diff --git a/lib/requests_cache/backends/redis.py b/lib/requests_cache/backends/redis.py deleted file mode 100644 index c800c4f5..00000000 --- a/lib/requests_cache/backends/redis.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.redis - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - ``redis`` cache backend -""" -from .base import BaseCache -from .storage.redisdict import RedisDict - - -class RedisCache(BaseCache): - """ ``redis`` cache backend. - """ - def __init__(self, namespace='requests-cache', **options): - """ - :param namespace: redis namespace (default: ``'requests-cache'``) - :param connection: (optional) ``redis.StrictRedis`` - """ - super(RedisCache, self).__init__() - self.responses = RedisDict(namespace, 'responses', - options.get('connection')) - self.keys_map = RedisDict(namespace, 'urls', self.responses.connection) diff --git a/lib/requests_cache/backends/sqlite.py b/lib/requests_cache/backends/sqlite.py deleted file mode 100644 index 5e56b22c..00000000 --- a/lib/requests_cache/backends/sqlite.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.sqlite - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - ``sqlite3`` cache backend -""" -from .base import BaseCache -from .storage.dbdict import DbDict, DbPickleDict - - -class DbCache(BaseCache): - """ sqlite cache backend. - - Reading is fast, saving is a bit slower. It can store big amount of data - with low memory usage. - """ - def __init__(self, location='cache', - fast_save=False, extension='.sqlite', **options): - """ - :param location: database filename prefix (default: ``'cache'``) - :param fast_save: Speedup cache saving up to 50 times but with possibility of data loss. - See :ref:`backends.DbDict ` for more info - :param extension: extension for filename (default: ``'.sqlite'``) - """ - super(DbCache, self).__init__() - self.responses = DbPickleDict(location + extension, 'responses', fast_save=fast_save) - self.keys_map = DbDict(location + extension, 'urls') - diff --git a/lib/requests_cache/backends/storage/__init__.py b/lib/requests_cache/backends/storage/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/requests_cache/backends/storage/dbdict.py b/lib/requests_cache/backends/storage/dbdict.py deleted file mode 100644 index ef0f516c..00000000 --- a/lib/requests_cache/backends/storage/dbdict.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.dbdict - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Dictionary-like objects for saving large data sets to `sqlite` database -""" -from collections import MutableMapping -import sqlite3 as sqlite -from contextlib import contextmanager -try: - import threading -except ImportError: - import dummy_threading as threading -try: - import cPickle as pickle -except ImportError: - import pickle - -from requests_cache.compat import bytes - - - -class DbDict(MutableMapping): - """ DbDict - a dictionary-like object for saving large datasets to `sqlite` database - - It's possible to create multiply DbDict instances, which will be stored as separate - tables in one database:: - - d1 = DbDict('test', 'table1') - d2 = DbDict('test', 'table2') - d3 = DbDict('test', 'table3') - - all data will be stored in ``test.sqlite`` database into - correspondent tables: ``table1``, ``table2`` and ``table3`` - """ - - def __init__(self, filename, table_name='data', fast_save=False, **options): - """ - :param filename: filename for database (without extension) - :param table_name: table name - :param fast_save: If it's True, then sqlite will be configured with - `"PRAGMA synchronous = 0;" `_ - to speedup cache saving, but be careful, it's dangerous. - Tests showed that insertion order of records can be wrong with this option. - """ - self.filename = filename - self.table_name = table_name - self.fast_save = fast_save - - #: Transactions can be commited if this property is set to `True` - self.can_commit = True - - - self._bulk_commit = False - self._pending_connection = None - self._lock = threading.RLock() - with self.connection() as con: - con.execute("create table if not exists `%s` (key PRIMARY KEY, value)" % self.table_name) - - - @contextmanager - def connection(self, commit_on_success=False): - with self._lock: - if self._bulk_commit: - if self._pending_connection is None: - self._pending_connection = sqlite.connect(self.filename) - con = self._pending_connection - else: - con = sqlite.connect(self.filename) - try: - if self.fast_save: - con.execute("PRAGMA synchronous = 0;") - yield con - if commit_on_success and self.can_commit: - con.commit() - finally: - if not self._bulk_commit: - con.close() - - def commit(self, force=False): - """ - Commits pending transaction if :attr:`can_commit` or `force` is `True` - - :param force: force commit, ignore :attr:`can_commit` - """ - if force or self.can_commit: - if self._pending_connection is not None: - self._pending_connection.commit() - - @contextmanager - def bulk_commit(self): - """ - Context manager used to speedup insertion of big number of records - :: - - >>> d1 = DbDict('test') - >>> with d1.bulk_commit(): - ... for i in range(1000): - ... d1[i] = i * 2 - - """ - self._bulk_commit = True - self.can_commit = False - try: - yield - self.commit(True) - finally: - self._bulk_commit = False - self.can_commit = True - self._pending_connection.close() - self._pending_connection = None - - def __getitem__(self, key): - with self.connection() as con: - row = con.execute("select value from `%s` where key=?" % - self.table_name, (key,)).fetchone() - if not row: - raise KeyError - return row[0] - - def __setitem__(self, key, item): - with self.connection(True) as con: - if con.execute("select key from `%s` where key=?" % - self.table_name, (key,)).fetchone(): - con.execute("update `%s` set value=? where key=?" % - self.table_name, (item, key)) - else: - con.execute("insert into `%s` (key,value) values (?,?)" % - self.table_name, (key, item)) - - def __delitem__(self, key): - with self.connection(True) as con: - if con.execute("select key from `%s` where key=?" % - self.table_name, (key,)).fetchone(): - con.execute("delete from `%s` where key=?" % - self.table_name, (key,)) - else: - raise KeyError - - def __iter__(self): - with self.connection() as con: - for row in con.execute("select key from `%s`" % - self.table_name): - yield row[0] - - def __len__(self): - with self.connection() as con: - return con.execute("select count(key) from `%s`" % - self.table_name).fetchone()[0] - - def clear(self): - with self.connection(True) as con: - con.execute("drop table `%s`" % self.table_name) - con.execute("create table `%s` (key PRIMARY KEY, value)" % - self.table_name) - - def __str__(self): - return str(dict(self.items())) - - -class DbPickleDict(DbDict): - """ Same as :class:`DbDict`, but pickles values before saving - """ - def __setitem__(self, key, item): - super(DbPickleDict, self).__setitem__(key, - sqlite.Binary(pickle.dumps(item))) - - def __getitem__(self, key): - return pickle.loads(bytes(super(DbPickleDict, self).__getitem__(key))) diff --git a/lib/requests_cache/backends/storage/mongodict.py b/lib/requests_cache/backends/storage/mongodict.py deleted file mode 100644 index 172d9a0f..00000000 --- a/lib/requests_cache/backends/storage/mongodict.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.mongodict - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Dictionary-like objects for saving large data sets to ``mongodb`` database -""" -from collections import MutableMapping -try: - import cPickle as pickle -except ImportError: - import pickle - -from pymongo import Connection - - -class MongoDict(MutableMapping): - """ MongoDict - a dictionary-like interface for ``mongo`` database - """ - def __init__(self, db_name, - collection_name='mongo_dict_data', connection=None): - """ - :param db_name: database name (be careful with production databases) - :param collection_name: collection name (default: mongo_dict_data) - :param connection: ``pymongo.Connection`` instance. If it's ``None`` - (default) new connection with default options will - be created - """ - if connection is not None: - self.connection = connection - else: - self.connection = Connection() - self.db = self.connection[db_name] - self.collection = self.db[collection_name] - - def __getitem__(self, key): - result = self.collection.find_one({'_id': key}) - if result is None: - raise KeyError - return result['data'] - - def __setitem__(self, key, item): - self.collection.save({'_id': key, 'data': item}) - - def __delitem__(self, key): - spec = {'_id': key} - if self.collection.find_one(spec, fields=['_id']): - self.collection.remove(spec) - else: - raise KeyError - - def __len__(self): - return self.collection.count() - - def __iter__(self): - for d in self.collection.find(fields=['_id']): - yield d['_id'] - - def clear(self): - self.collection.drop() - - def __str__(self): - return str(dict(self.items())) - - -class MongoPickleDict(MongoDict): - """ Same as :class:`MongoDict`, but pickles values before saving - """ - def __setitem__(self, key, item): - super(MongoPickleDict, self).__setitem__(key, pickle.dumps(item)) - - def __getitem__(self, key): - return pickle.loads(bytes(super(MongoPickleDict, self).__getitem__(key))) diff --git a/lib/requests_cache/backends/storage/redisdict.py b/lib/requests_cache/backends/storage/redisdict.py deleted file mode 100644 index dfe72a45..00000000 --- a/lib/requests_cache/backends/storage/redisdict.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.backends.redisdict - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Dictionary-like objects for saving large data sets to ``redis`` key-store -""" -from collections import MutableMapping -try: - import cPickle as pickle -except ImportError: - import pickle -from redis import StrictRedis as Redis - - -class RedisDict(MutableMapping): - """ RedisDict - a dictionary-like interface for ``redis`` key-stores - """ - def __init__(self, namespace, collection_name='redis_dict_data', - connection=None): - """ - The actual key name on the redis server will be - ``namespace``:``collection_name`` - - In order to deal with how redis stores data/keys, - everything, i.e. keys and data, must be pickled. - - :param namespace: namespace to use - :param collection_name: name of the hash map stored in redis - (default: redis_dict_data) - :param connection: ``redis.StrictRedis`` instance. - If it's ``None`` (default), a new connection with - default options will be created - - """ - if connection is not None: - self.connection = connection - else: - self.connection = Redis() - self._self_key = ':'.join([namespace, collection_name]) - - def __getitem__(self, key): - result = self.connection.hget(self._self_key, pickle.dumps(key)) - if result is None: - raise KeyError - return pickle.loads(bytes(result)) - - def __setitem__(self, key, item): - self.connection.hset(self._self_key, pickle.dumps(key), - pickle.dumps(item)) - - def __delitem__(self, key): - if not self.connection.hdel(self._self_key, pickle.dumps(key)): - raise KeyError - - def __len__(self): - return self.connection.hlen(self._self_key) - - def __iter__(self): - for v in self.connection.hkeys(self._self_key): - yield pickle.loads(bytes(v)) - - def clear(self): - self.connection.delete(self._self_key) - - def __str__(self): - return str(dict(self.items())) diff --git a/lib/requests_cache/compat.py b/lib/requests_cache/compat.py deleted file mode 100644 index daaf0bbd..00000000 --- a/lib/requests_cache/compat.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- -# taken from requests library: https://github.com/kennethreitz/requests -""" -pythoncompat -""" - - -import sys - -# ------- -# Pythons -# ------- - -# Syntax sugar. -_ver = sys.version_info - -#: Python 2.x? -is_py2 = (_ver[0] == 2) - -#: Python 3.x? -is_py3 = (_ver[0] == 3) - -#: Python 3.0.x -is_py30 = (is_py3 and _ver[1] == 0) - -#: Python 3.1.x -is_py31 = (is_py3 and _ver[1] == 1) - -#: Python 3.2.x -is_py32 = (is_py3 and _ver[1] == 2) - -#: Python 3.3.x -is_py33 = (is_py3 and _ver[1] == 3) - -#: Python 3.4.x -is_py34 = (is_py3 and _ver[1] == 4) - -#: Python 2.7.x -is_py27 = (is_py2 and _ver[1] == 7) - -#: Python 2.6.x -is_py26 = (is_py2 and _ver[1] == 6) - -#: Python 2.5.x -is_py25 = (is_py2 and _ver[1] == 5) - -#: Python 2.4.x -is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. - - -# --------- -# Platforms -# --------- - - -# Syntax sugar. -_ver = sys.version.lower() - -is_pypy = ('pypy' in _ver) -is_jython = ('jython' in _ver) -is_ironpython = ('iron' in _ver) - -# Assume CPython, if nothing else. -is_cpython = not any((is_pypy, is_jython, is_ironpython)) - -# Windows-based system. -is_windows = 'win32' in str(sys.platform).lower() - -# Standard Linux 2+ system. -is_linux = ('linux' in str(sys.platform).lower()) -is_osx = ('darwin' in str(sys.platform).lower()) -is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. -is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. - - -# --------- -# Specifics -# --------- - - -if is_py2: - from urllib import quote, unquote, urlencode - from urlparse import urlparse, urlunparse, urljoin, urlsplit - from urllib2 import parse_http_list - import cookielib - from StringIO import StringIO - bytes = str - str = unicode - basestring = basestring - - - -elif is_py3: - from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote - from urllib.request import parse_http_list - from http import cookiejar as cookielib - from http.cookies import SimpleCookie - from io import StringIO - - str = str - bytes = bytes - basestring = (str,bytes) - diff --git a/lib/requests_cache/core.py b/lib/requests_cache/core.py deleted file mode 100644 index 934636c0..00000000 --- a/lib/requests_cache/core.py +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - requests_cache.core - ~~~~~~~~~~~~~~~~~~~ - - Core functions for configuring cache and monkey patching ``requests`` -""" -from contextlib import contextmanager -from datetime import datetime, timedelta - -import requests -from requests import Session as OriginalSession -from requests.hooks import dispatch_hook - -from requests_cache import backends -from requests_cache.compat import str, basestring - -try: - ver = tuple(map(int, requests.__version__.split("."))) -except ValueError: - pass -else: - # We don't need to dispatch hook in Requests <= 1.1.0 - if ver < (1, 2, 0): - dispatch_hook = lambda key, hooks, hook_data, *a, **kw: hook_data - del ver - - -class CachedSession(OriginalSession): - """ Requests ``Sessions`` with caching support. - """ - - def __init__(self, cache_name='cache', backend=None, expire_after=None, - allowable_codes=(200,), allowable_methods=('GET',), - **backend_options): - """ - :param cache_name: for ``sqlite`` backend: cache file will start with this prefix, - e.g ``cache.sqlite`` - - for ``mongodb``: it's used as database name - - for ``redis``: it's used as the namespace. This means all keys - are prefixed with ``'cache_name:'`` - :param backend: cache backend name e.g ``'sqlite'``, ``'mongodb'``, ``'redis'``, ``'memory'``. - (see :ref:`persistence`). Or instance of backend implementation. - Default value is ``None``, which means use ``'sqlite'`` if available, - otherwise fallback to ``'memory'``. - :param expire_after: number of seconds after cache will be expired - or `None` (default) to ignore expiration - :type expire_after: float - :param allowable_codes: limit caching only for response with this codes (default: 200) - :type allowable_codes: tuple - :param allowable_methods: cache only requests of this methods (default: 'GET') - :type allowable_methods: tuple - :kwarg backend_options: options for chosen backend. See corresponding - :ref:`sqlite `, :ref:`mongo ` - and :ref:`redis ` backends API documentation - """ - if backend is None or isinstance(backend, basestring): - self.cache = backends.create_backend(backend, cache_name, - backend_options) - else: - self.cache = backend - - self._cache_expire_after = expire_after - self._cache_allowable_codes = allowable_codes - self._cache_allowable_methods = allowable_methods - self._is_cache_disabled = False - super(CachedSession, self).__init__() - - def send(self, request, **kwargs): - if (self._is_cache_disabled - or request.method not in self._cache_allowable_methods): - response = super(CachedSession, self).send(request, **kwargs) - response.from_cache = False - return response - - cache_key = self.cache.create_key(request) - - def send_request_and_cache_response(): - response = super(CachedSession, self).send(request, **kwargs) - if response.status_code in self._cache_allowable_codes: - self.cache.save_response(cache_key, response) - response.from_cache = False - return response - - response, timestamp = self.cache.get_response_and_time(cache_key) - if response is None: - return send_request_and_cache_response() - - if self._cache_expire_after is not None: - difference = datetime.utcnow() - timestamp - if difference > timedelta(seconds=self._cache_expire_after): - self.cache.delete(cache_key) - return send_request_and_cache_response() - # dispatch hook here, because we've removed it before pickling - response.from_cache = True - response = dispatch_hook('response', request.hooks, response, **kwargs) - return response - - def request(self, method, url, params=None, data=None, headers=None, - cookies=None, files=None, auth=None, timeout=None, - allow_redirects=True, proxies=None, hooks=None, stream=None, - verify=None, cert=None): - response = super(CachedSession, self).request(method, url, params, data, - headers, cookies, files, - auth, timeout, - allow_redirects, proxies, - hooks, stream, verify, cert) - if self._is_cache_disabled: - return response - - main_key = self.cache.create_key(response.request) - for r in response.history: - self.cache.add_key_mapping( - self.cache.create_key(r.request), main_key - ) - return response - - @contextmanager - def cache_disabled(self): - """ - Context manager for temporary disabling cache - :: - - >>> s = CachedSession() - >>> with s.cache_disabled(): - ... s.get('http://httpbin.org/ip') - """ - self._is_cache_disabled = True - try: - yield - finally: - self._is_cache_disabled = False - - -def install_cache(cache_name='cache', backend=None, expire_after=None, - allowable_codes=(200,), allowable_methods=('GET',), - session_factory=CachedSession, **backend_options): - """ - Installs cache for all ``Requests`` requests by monkey-patching ``Session`` - - Parameters are the same as in :class:`CachedSession`. Additional parameters: - - :param session_factory: Session factory. It should inherit :class:`CachedSession` (default) - """ - if backend: - backend = backends.create_backend(backend, cache_name, backend_options) - _patch_session_factory( - lambda : session_factory(cache_name=cache_name, - backend=backend, - expire_after=expire_after, - allowable_codes=allowable_codes, - allowable_methods=allowable_methods, - **backend_options) - ) - - -# backward compatibility -configure = install_cache - - -def uninstall_cache(): - """ Restores ``requests.Session`` and disables cache - """ - _patch_session_factory(OriginalSession) - - -@contextmanager -def disabled(): - """ - Context manager for temporary disabling globally installed cache - - .. warning:: not thread-safe - - :: - - >>> with requests_cache.disabled(): - ... requests.get('http://httpbin.org/ip') - ... requests.get('http://httpbin.org/get') - - """ - previous = requests.Session - uninstall_cache() - try: - yield - finally: - _patch_session_factory(previous) - - -@contextmanager -def enabled(*args, **kwargs): - """ - Context manager for temporary installing global cache. - - Accepts same arguments as :func:`install_cache` - - .. warning:: not thread-safe - - :: - - >>> with requests_cache.enabled('cache_db'): - ... requests.get('http://httpbin.org/get') - - """ - install_cache(*args, **kwargs) - try: - yield - finally: - uninstall_cache() - - -def get_cache(): - """ Returns internal cache object from globally installed ``CachedSession`` - """ - return requests.Session().cache - - -def clear(): - """ Clears globally installed cache - """ - get_cache().clear() - - -def _patch_session_factory(session_factory=CachedSession): - requests.Session = requests.sessions.Session = session_factory diff --git a/lib/tvdb_api/tvdb_api.py b/lib/tvdb_api/tvdb_api.py index ddcecec3..85301294 100644 --- a/lib/tvdb_api/tvdb_api.py +++ b/lib/tvdb_api/tvdb_api.py @@ -37,15 +37,13 @@ except ImportError: gzip = None from lib import requests -from lib import requests_cache +from lib import cachecontrol +from lib.cachecontrol import caches from tvdb_ui import BaseUI, ConsoleUI from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound, tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound) -# Cached Session Handler -from lib.httpcache import CachingHTTPAdapter - def log(): return logging.getLogger("tvdb_api") @@ -429,12 +427,14 @@ class Tvdb: if cache is True: self.config['cache_enabled'] = True - requests_cache.install_cache(self._getTempDir()) + self.sess = cachecontrol.CacheControl(requests.Session(), + cache=caches.FileCache(self._getTempDir()), cache_all=True) elif cache is False: self.config['cache_enabled'] = False elif isinstance(cache, basestring): self.config['cache_enabled'] = True - requests_cache.install_cache(cache) + self.sess = cachecontrol.CacheControl(requests.Session(), + cache=caches.FileCache(cache), cache_all=True) else: raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) @@ -537,10 +537,9 @@ class Tvdb: # get response from TVDB if self.config['cache_enabled']: - resp = requests.get(url, params=params) + resp = self.sess.get(url, params=sorted(params)) else: - with requests_cache.disabled(): - resp = requests.get(url, params=params) + resp = requests.get(url, params=params) except requests.HTTPError, e: raise tvdb_error("HTTP error " + str(e.errno) + " while loading URL " + str(url)) diff --git a/lib/tvrage_api/tvrage_api.py b/lib/tvrage_api/tvrage_api.py index 5a99a2ed..35d9ceb9 100644 --- a/lib/tvrage_api/tvrage_api.py +++ b/lib/tvrage_api/tvrage_api.py @@ -32,15 +32,13 @@ except ImportError: from lib.dateutil.parser import parse from lib import requests -from lib import requests_cache +from lib import cachecontrol +from lib.cachecontrol import caches from tvrage_ui import BaseUI from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound) -# Cached Session Handler -from lib.httpcache import CachingHTTPAdapter - def log(): return logging.getLogger("tvrage_api") @@ -272,12 +270,14 @@ class TVRage: if cache is True: self.config['cache_enabled'] = True - requests_cache.install_cache(self._getTempDir()) + self.sess = cachecontrol.CacheControl(requests.Session(), + cache=caches.FileCache(self._getTempDir()), cache_all=True) elif cache is False: self.config['cache_enabled'] = False elif isinstance(cache, basestring): self.config['cache_enabled'] = True - requests_cache.install_cache(cache) + self.sess = cachecontrol.CacheControl(requests.Session(), + cache=caches.FileCache(cache), cache_all=True) else: raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) @@ -370,10 +370,9 @@ class TVRage: # get response from TVRage if self.config['cache_enabled']: - resp = requests.get(url, params=params) + resp = self.sess.get(url, params=params) else: - with requests_cache.disabled(): - resp = requests.get(url, params=params) + resp = requests.get(url, params=params) except requests.HTTPError, e: raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))