mirror of
https://github.com/moparisthebest/SickRage
synced 2024-11-10 19:35:08 -05:00
Updated our cache code.
Updated rsstorrents to not bother using requests sessions.
This commit is contained in:
parent
5b69783e86
commit
94670f7f95
@ -2,12 +2,6 @@
|
|||||||
|
|
||||||
Make it easy to import from cachecontrol without long namespaces.
|
Make it easy to import from cachecontrol without long namespaces.
|
||||||
"""
|
"""
|
||||||
|
from .wrapper import CacheControl
|
||||||
# patch our requests.models.Response to make them pickleable in older
|
from .adapter import CacheControlAdapter
|
||||||
# versions of requests.
|
from .controller import CacheController
|
||||||
|
|
||||||
import cachecontrol.patch_requests
|
|
||||||
|
|
||||||
from cachecontrol.wrapper import CacheControl
|
|
||||||
from cachecontrol.adapter import CacheControlAdapter
|
|
||||||
from cachecontrol.controller import CacheController
|
|
||||||
|
@ -1,43 +1,65 @@
|
|||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
|
|
||||||
from cachecontrol.controller import CacheController
|
from .controller import CacheController
|
||||||
from cachecontrol.cache import DictCache
|
from .cache import DictCache
|
||||||
|
|
||||||
class CacheControlAdapter(HTTPAdapter):
|
class CacheControlAdapter(HTTPAdapter):
|
||||||
invalidating_methods = set(['PUT', 'DELETE'])
|
invalidating_methods = set(['PUT', 'DELETE'])
|
||||||
|
|
||||||
def __init__(self, cache=None, cache_etags=True, *args, **kw):
|
def __init__(self, cache=None, cache_etags=True, controller_class=None,
|
||||||
|
serializer=None, *args, **kw):
|
||||||
super(CacheControlAdapter, self).__init__(*args, **kw)
|
super(CacheControlAdapter, self).__init__(*args, **kw)
|
||||||
self.cache = cache or DictCache()
|
self.cache = cache or DictCache()
|
||||||
self.controller = CacheController(self.cache, cache_etags=cache_etags)
|
|
||||||
|
controller_factory = controller_class or CacheController
|
||||||
|
self.controller = controller_factory(
|
||||||
|
self.cache,
|
||||||
|
cache_etags=cache_etags,
|
||||||
|
serializer=serializer,
|
||||||
|
)
|
||||||
|
|
||||||
def send(self, request, **kw):
|
def send(self, request, **kw):
|
||||||
"""Send a request. Use the request information to see if it
|
"""
|
||||||
exists in the cache.
|
Send a request. Use the request information to see if it
|
||||||
|
exists in the cache and cache the response if we need to and can.
|
||||||
"""
|
"""
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
cached_response = self.controller.cached_request(
|
cached_response = self.controller.cached_request(request)
|
||||||
request.url, request.headers
|
|
||||||
)
|
|
||||||
if cached_response:
|
if cached_response:
|
||||||
# Cached responses should not have a raw field since
|
return self.build_response(request, cached_response, from_cache=True)
|
||||||
# they *cannot* be created from some stream.
|
|
||||||
cached_response.raw = None
|
|
||||||
return cached_response
|
|
||||||
|
|
||||||
# check for etags and add headers if appropriate
|
# check for etags and add headers if appropriate
|
||||||
headers = self.controller.add_headers(request.url)
|
request.headers.update(self.controller.conditional_headers(request))
|
||||||
request.headers.update(headers)
|
|
||||||
|
|
||||||
resp = super(CacheControlAdapter, self).send(request, **kw)
|
resp = super(CacheControlAdapter, self).send(request, **kw)
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
def build_response(self, request, response):
|
def build_response(self, request, response, from_cache=False):
|
||||||
"""Build a response by making a request or using the cache.
|
"""
|
||||||
|
Build a response by making a request or using the cache.
|
||||||
|
|
||||||
This will end up calling send and returning a potentially
|
This will end up calling send and returning a potentially
|
||||||
cached response
|
cached response
|
||||||
"""
|
"""
|
||||||
|
if not from_cache and request.method == 'GET':
|
||||||
|
if response.status == 304:
|
||||||
|
# We must have sent an ETag request. This could mean
|
||||||
|
# that we've been expired already or that we simply
|
||||||
|
# have an etag. In either case, we want to try and
|
||||||
|
# update the cache if that is the case.
|
||||||
|
cached_response = self.controller.update_cached_response(
|
||||||
|
request, response
|
||||||
|
)
|
||||||
|
|
||||||
|
if cached_response is not response:
|
||||||
|
from_cache = True
|
||||||
|
|
||||||
|
response = cached_response
|
||||||
|
else:
|
||||||
|
# try to cache the response
|
||||||
|
self.controller.cache_response(request, response)
|
||||||
|
|
||||||
resp = super(CacheControlAdapter, self).build_response(
|
resp = super(CacheControlAdapter, self).build_response(
|
||||||
request, response
|
request, response
|
||||||
)
|
)
|
||||||
@ -47,28 +69,7 @@ class CacheControlAdapter(HTTPAdapter):
|
|||||||
cache_url = self.controller.cache_url(request.url)
|
cache_url = self.controller.cache_url(request.url)
|
||||||
self.cache.delete(cache_url)
|
self.cache.delete(cache_url)
|
||||||
|
|
||||||
# Try to store the response if it is a GET
|
|
||||||
elif request.method == 'GET':
|
|
||||||
if response.status == 304:
|
|
||||||
# We must have sent an ETag request. This could mean
|
|
||||||
# that we've been expired already or that we simply
|
|
||||||
# have an etag. In either case, we want to try and
|
|
||||||
# update the cache if that is the case.
|
|
||||||
resp = self.controller.update_cached_response(
|
|
||||||
request, response
|
|
||||||
)
|
|
||||||
# Fix possible exception when using missing `raw` field in
|
|
||||||
# requests
|
|
||||||
# TODO: remove when requests will be bump to 2.2.2 or 2.3
|
|
||||||
# version
|
|
||||||
resp.raw = None
|
|
||||||
else:
|
|
||||||
# try to cache the response
|
|
||||||
self.controller.cache_response(request, resp)
|
|
||||||
|
|
||||||
# Give the request a from_cache attr to let people use it
|
# Give the request a from_cache attr to let people use it
|
||||||
# rather than testing for hasattr.
|
resp.from_cache = from_cache
|
||||||
if not hasattr(resp, 'from_cache'):
|
|
||||||
resp.from_cache = False
|
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from cachecontrol.caches.file_cache import FileCache
|
from .file_cache import FileCache
|
||||||
except ImportError:
|
except ImportError:
|
||||||
notice = dedent('''
|
notice = dedent('''
|
||||||
NOTE: In order to use the FileCache you must have
|
NOTE: In order to use the FileCache you must have
|
||||||
@ -13,6 +13,6 @@ except ImportError:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import redis
|
import redis
|
||||||
from cachecontrol.caches.redis_cache import RedisCache
|
from .redis_cache import RedisCache
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
@ -1,26 +1,62 @@
|
|||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from hashlib import md5
|
|
||||||
|
|
||||||
try:
|
|
||||||
from pickle import load, dump, HIGHEST_PROTOCOL
|
|
||||||
except ImportError:
|
|
||||||
from cPickle import load, dump, HIGHEST_PROTOCOL
|
|
||||||
|
|
||||||
from lockfile import FileLock
|
from lockfile import FileLock
|
||||||
|
|
||||||
|
|
||||||
|
def _secure_open_write(filename, fmode):
|
||||||
|
# We only want to write to this file, so open it in write only mode
|
||||||
|
flags = os.O_WRONLY
|
||||||
|
|
||||||
|
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
||||||
|
# will open *new* files.
|
||||||
|
# We specify this because we want to ensure that the mode we pass is the
|
||||||
|
# mode of the file.
|
||||||
|
flags |= os.O_CREAT | os.O_EXCL
|
||||||
|
|
||||||
|
# Do not follow symlinks to prevent someone from making a symlink that
|
||||||
|
# we follow and insecurely open a cache file.
|
||||||
|
if hasattr(os, "O_NOFOLLOW"):
|
||||||
|
flags |= os.O_NOFOLLOW
|
||||||
|
|
||||||
|
# On Windows we'll mark this file as binary
|
||||||
|
if hasattr(os, "O_BINARY"):
|
||||||
|
flags |= os.O_BINARY
|
||||||
|
|
||||||
|
# Before we open our file, we want to delete any existing file that is
|
||||||
|
# there
|
||||||
|
try:
|
||||||
|
os.remove(filename)
|
||||||
|
except (IOError, OSError):
|
||||||
|
# The file must not exist already, so we can just skip ahead to opening
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
||||||
|
# race condition happens between the os.remove and this line, that an
|
||||||
|
# error will be raised. Because we utilize a lockfile this should only
|
||||||
|
# happen if someone is attempting to attack us.
|
||||||
|
fd = os.open(filename, flags, fmode)
|
||||||
|
try:
|
||||||
|
return os.fdopen(fd, "wb")
|
||||||
|
except:
|
||||||
|
# An error occurred wrapping our FD in a file object
|
||||||
|
os.close(fd)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
class FileCache(object):
|
class FileCache(object):
|
||||||
def __init__(self, directory, forever=False):
|
def __init__(self, directory, forever=False, filemode=0o0600,
|
||||||
|
dirmode=0o0700):
|
||||||
self.directory = directory
|
self.directory = directory
|
||||||
self.forever = forever
|
self.forever = forever
|
||||||
|
self.filemode = filemode
|
||||||
|
|
||||||
if not os.path.isdir(self.directory):
|
if not os.path.isdir(self.directory):
|
||||||
os.mkdir(self.directory)
|
os.makedirs(self.directory, dirmode)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def encode(x):
|
def encode(x):
|
||||||
return md5(x.encode()).hexdigest()
|
return hashlib.sha224(x.encode()).hexdigest()
|
||||||
|
|
||||||
def _fn(self, name):
|
def _fn(self, name):
|
||||||
return os.path.join(self.directory, self.encode(name))
|
return os.path.join(self.directory, self.encode(name))
|
||||||
@ -31,19 +67,13 @@ class FileCache(object):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
with open(name, 'rb') as fh:
|
with open(name, 'rb') as fh:
|
||||||
try:
|
return fh.read()
|
||||||
if sys.version < '3':
|
|
||||||
return load(fh)
|
|
||||||
else:
|
|
||||||
return load(fh, encoding='latin1')
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def set(self, key, value):
|
def set(self, key, value):
|
||||||
name = self._fn(key)
|
name = self._fn(key)
|
||||||
with FileLock(name) as lock:
|
with FileLock(name) as lock:
|
||||||
with open(lock.path, 'wb') as fh:
|
with _secure_open_write(lock.path, self.filemode) as fh:
|
||||||
dump(value, fh, HIGHEST_PROTOCOL)
|
fh.write(value)
|
||||||
|
|
||||||
def delete(self, key):
|
def delete(self, key):
|
||||||
name = self._fn(key)
|
name = self._fn(key)
|
||||||
|
@ -2,11 +2,6 @@ from __future__ import division
|
|||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
try:
|
|
||||||
from cPickle import loads, dumps
|
|
||||||
except ImportError: # Python 3.x
|
|
||||||
from pickle import loads, dumps
|
|
||||||
|
|
||||||
|
|
||||||
def total_seconds(td):
|
def total_seconds(td):
|
||||||
"""Python 2.6 compatability"""
|
"""Python 2.6 compatability"""
|
||||||
@ -24,14 +19,11 @@ class RedisCache(object):
|
|||||||
self.conn = conn
|
self.conn = conn
|
||||||
|
|
||||||
def get(self, key):
|
def get(self, key):
|
||||||
val = self.conn.get(key)
|
return self.conn.get(key)
|
||||||
if val:
|
|
||||||
return loads(val)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def set(self, key, value, expires=None):
|
def set(self, key, value, expires=None):
|
||||||
if not expires:
|
if not expires:
|
||||||
self.conn.set(key, dumps(value))
|
self.conn.set(key, value)
|
||||||
else:
|
else:
|
||||||
expires = expires - datetime.now()
|
expires = expires - datetime.now()
|
||||||
self.conn.setex(key, total_seconds(expires), value)
|
self.conn.setex(key, total_seconds(expires), value)
|
||||||
|
@ -10,3 +10,17 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
import email.Utils
|
import email.Utils
|
||||||
parsedate_tz = email.Utils.parsedate_tz
|
parsedate_tz = email.Utils.parsedate_tz
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cPickle as pickle
|
||||||
|
except ImportError:
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
# Handle the case where the requests has been patched to not have urllib3
|
||||||
|
# bundled as part of it's source.
|
||||||
|
try:
|
||||||
|
from requests.packages.urllib3.response import HTTPResponse
|
||||||
|
except ImportError:
|
||||||
|
from urllib3.response import HTTPResponse
|
||||||
|
@ -6,9 +6,11 @@ import calendar
|
|||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from cachecontrol.cache import DictCache
|
from requests.structures import CaseInsensitiveDict
|
||||||
from cachecontrol.compat import parsedate_tz
|
|
||||||
from cachecontrol.session import CacheControlSession
|
from .cache import DictCache
|
||||||
|
from .compat import parsedate_tz
|
||||||
|
from .serialize import Serializer
|
||||||
|
|
||||||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
||||||
|
|
||||||
@ -25,9 +27,10 @@ def parse_uri(uri):
|
|||||||
class CacheController(object):
|
class CacheController(object):
|
||||||
"""An interface to see if request should cached or not.
|
"""An interface to see if request should cached or not.
|
||||||
"""
|
"""
|
||||||
def __init__(self, cache=None, sess=None, cache_etags=True):
|
def __init__(self, cache=None, cache_etags=True, serializer=None):
|
||||||
self.cache = cache or DictCache()
|
self.cache = cache or DictCache()
|
||||||
self.cache_etags = cache_etags
|
self.cache_etags = cache_etags
|
||||||
|
self.serializer = serializer or Serializer()
|
||||||
|
|
||||||
def _urlnorm(self, uri):
|
def _urlnorm(self, uri):
|
||||||
"""Normalize the URL to create a safe key for the cache"""
|
"""Normalize the URL to create a safe key for the cache"""
|
||||||
@ -71,62 +74,47 @@ class CacheController(object):
|
|||||||
retval = dict(parts_with_args + parts_wo_args)
|
retval = dict(parts_with_args + parts_wo_args)
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
def cached_request(self, url, headers):
|
def cached_request(self, request):
|
||||||
cache_url = self.cache_url(url)
|
cache_url = self.cache_url(request.url)
|
||||||
cc = self.parse_cache_control(headers)
|
cc = self.parse_cache_control(request.headers)
|
||||||
|
|
||||||
# non-caching states
|
# non-caching states
|
||||||
no_cache = True if 'no-cache' in cc else False
|
no_cache = True if 'no-cache' in cc else False
|
||||||
if 'max-age' in cc and cc['max-age'] == 0:
|
if 'max-age' in cc and cc['max-age'] == 0:
|
||||||
no_cache = True
|
no_cache = True
|
||||||
|
|
||||||
# see if it is in the cache anyways
|
# Bail out if no-cache was set
|
||||||
in_cache = self.cache.get(cache_url)
|
if no_cache:
|
||||||
if no_cache or not in_cache:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# It is in the cache, so lets see if it is going to be
|
# It is in the cache, so lets see if it is going to be
|
||||||
# fresh enough
|
# fresh enough
|
||||||
resp = self.cache.get(cache_url)
|
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
||||||
|
|
||||||
# Check our Vary header to make sure our request headers match
|
# Check to see if we have a cached object
|
||||||
# up. We don't delete it from the though, we just don't return
|
if not resp:
|
||||||
# our cached value.
|
|
||||||
#
|
|
||||||
# NOTE: Because httplib2 stores raw content, it denotes
|
|
||||||
# headers that were sent in the original response by
|
|
||||||
# adding -varied-$name. We don't have to do that b/c we
|
|
||||||
# are storing the object which has a reference to the
|
|
||||||
# original request. If that changes, then I'd propose
|
|
||||||
# using the varied headers in the cache key to avoid the
|
|
||||||
# situation all together.
|
|
||||||
if 'vary' in resp.headers:
|
|
||||||
varied_headers = resp.headers['vary'].replace(' ', '').split(',')
|
|
||||||
original_headers = resp.request.headers
|
|
||||||
for header in varied_headers:
|
|
||||||
# If our headers don't match for the headers listed in
|
|
||||||
# the vary header, then don't use the cached response
|
|
||||||
if headers.get(header, None) != original_headers.get(header):
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
headers = CaseInsensitiveDict(resp.headers)
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
date = calendar.timegm(
|
date = calendar.timegm(
|
||||||
parsedate_tz(resp.headers['date'])
|
parsedate_tz(headers['date'])
|
||||||
)
|
)
|
||||||
current_age = max(0, now - date)
|
current_age = max(0, now - date)
|
||||||
|
|
||||||
# TODO: There is an assumption that the result will be a
|
# TODO: There is an assumption that the result will be a
|
||||||
# requests response object. This may not be best since we
|
# urllib3 response object. This may not be best since we
|
||||||
# could probably avoid instantiating or constructing the
|
# could probably avoid instantiating or constructing the
|
||||||
# response until we know we need it.
|
# response until we know we need it.
|
||||||
resp_cc = self.parse_cache_control(resp.headers)
|
resp_cc = self.parse_cache_control(headers)
|
||||||
|
|
||||||
# determine freshness
|
# determine freshness
|
||||||
freshness_lifetime = 0
|
freshness_lifetime = 0
|
||||||
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
|
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
|
||||||
freshness_lifetime = int(resp_cc['max-age'])
|
freshness_lifetime = int(resp_cc['max-age'])
|
||||||
elif 'expires' in resp.headers:
|
elif 'expires' in headers:
|
||||||
expires = parsedate_tz(resp.headers['expires'])
|
expires = parsedate_tz(headers['expires'])
|
||||||
if expires is not None:
|
if expires is not None:
|
||||||
expire_time = calendar.timegm(expires) - date
|
expire_time = calendar.timegm(expires) - date
|
||||||
freshness_lifetime = max(0, expire_time)
|
freshness_lifetime = max(0, expire_time)
|
||||||
@ -150,30 +138,32 @@ class CacheController(object):
|
|||||||
fresh = (freshness_lifetime > current_age)
|
fresh = (freshness_lifetime > current_age)
|
||||||
|
|
||||||
if fresh:
|
if fresh:
|
||||||
# make sure we set the from_cache to true
|
|
||||||
resp.from_cache = True
|
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
# we're not fresh. If we don't have an Etag, clear it out
|
# we're not fresh. If we don't have an Etag, clear it out
|
||||||
if 'etag' not in resp.headers:
|
if 'etag' not in headers:
|
||||||
self.cache.delete(cache_url)
|
self.cache.delete(cache_url)
|
||||||
|
|
||||||
if 'etag' in resp.headers:
|
|
||||||
headers['If-None-Match'] = resp.headers['ETag']
|
|
||||||
|
|
||||||
if 'last-modified' in resp.headers:
|
|
||||||
headers['If-Modified-Since'] = resp.headers['Last-Modified']
|
|
||||||
|
|
||||||
# return the original handler
|
# return the original handler
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def add_headers(self, url):
|
def conditional_headers(self, request):
|
||||||
resp = self.cache.get(url)
|
cache_url = self.cache_url(request.url)
|
||||||
if resp and 'etag' in resp.headers:
|
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
||||||
return {'If-None-Match': resp.headers['etag']}
|
new_headers = {}
|
||||||
return {}
|
|
||||||
|
|
||||||
def cache_response(self, request, resp):
|
if resp:
|
||||||
|
headers = CaseInsensitiveDict(resp.headers)
|
||||||
|
|
||||||
|
if 'etag' in headers:
|
||||||
|
new_headers['If-None-Match'] = headers['ETag']
|
||||||
|
|
||||||
|
if 'last-modified' in headers:
|
||||||
|
new_headers['If-Modified-Since'] = headers['Last-Modified']
|
||||||
|
|
||||||
|
return new_headers
|
||||||
|
|
||||||
|
def cache_response(self, request, response):
|
||||||
"""
|
"""
|
||||||
Algorithm for caching requests.
|
Algorithm for caching requests.
|
||||||
|
|
||||||
@ -181,7 +171,7 @@ class CacheController(object):
|
|||||||
"""
|
"""
|
||||||
# From httplib2: Don't cache 206's since we aren't going to
|
# From httplib2: Don't cache 206's since we aren't going to
|
||||||
# handle byte range requests
|
# handle byte range requests
|
||||||
if resp.status_code not in [200, 203]:
|
if response.status not in [200, 203]:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Cache Session Params
|
# Cache Session Params
|
||||||
@ -189,13 +179,15 @@ class CacheController(object):
|
|||||||
cache_urls = getattr(request, 'cache_urls', [])
|
cache_urls = getattr(request, 'cache_urls', [])
|
||||||
cache_max_age = getattr(request, 'cache_max_age', None)
|
cache_max_age = getattr(request, 'cache_max_age', None)
|
||||||
|
|
||||||
|
response_headers = CaseInsensitiveDict(response.headers)
|
||||||
|
|
||||||
# Check if we are wanting to cache responses from specific urls only
|
# Check if we are wanting to cache responses from specific urls only
|
||||||
cache_url = self.cache_url(request.url)
|
cache_url = self.cache_url(request.url)
|
||||||
if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
|
if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
|
||||||
return
|
return
|
||||||
|
|
||||||
cc_req = self.parse_cache_control(request.headers)
|
cc_req = self.parse_cache_control(request.headers)
|
||||||
cc = self.parse_cache_control(resp.headers)
|
cc = self.parse_cache_control(response_headers)
|
||||||
|
|
||||||
# Delete it from the cache if we happen to have it stored there
|
# Delete it from the cache if we happen to have it stored there
|
||||||
no_store = cc.get('no-store') or cc_req.get('no-store')
|
no_store = cc.get('no-store') or cc_req.get('no-store')
|
||||||
@ -203,40 +195,43 @@ class CacheController(object):
|
|||||||
self.cache.delete(cache_url)
|
self.cache.delete(cache_url)
|
||||||
|
|
||||||
# If we've been given an etag, then keep the response
|
# If we've been given an etag, then keep the response
|
||||||
if self.cache_etags and 'etag' in resp.headers:
|
if self.cache_etags and 'etag' in response_headers:
|
||||||
self.cache.set(cache_url, resp)
|
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||||
|
|
||||||
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
|
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
|
||||||
elif cache_auto and not cc and resp.headers:
|
elif cache_auto and not cc and response_headers:
|
||||||
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)}
|
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)}
|
||||||
resp.headers.update(headers)
|
response.headers.update(headers)
|
||||||
|
|
||||||
if 'expires' not in resp.headers:
|
if 'expires' not in response_headers:
|
||||||
if getattr(resp.headers, 'expires', None) is None:
|
if getattr(response_headers, 'expires', None) is None:
|
||||||
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(1))
|
expires = datetime.datetime.utcnow() + datetime.timedelta(days=1)
|
||||||
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||||
headers = {'Expires': expires}
|
headers = {'Expires': expires}
|
||||||
resp.headers.update(headers)
|
response.headers.update(headers)
|
||||||
|
|
||||||
self.cache.set(cache_url, resp)
|
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||||
|
|
||||||
# Add to the cache if the response headers demand it. If there
|
# Add to the cache if the response headers demand it. If there
|
||||||
# is no date header then we can't do anything about expiring
|
# is no date header then we can't do anything about expiring
|
||||||
# the cache.
|
# the cache.
|
||||||
elif 'date' in resp.headers:
|
elif 'date' in response_headers:
|
||||||
# cache when there is a max-age > 0
|
# cache when there is a max-age > 0
|
||||||
if cc and cc.get('max-age'):
|
if cc and cc.get('max-age'):
|
||||||
if int(cc['max-age']) > 0:
|
if int(cc['max-age']) > 0:
|
||||||
if isinstance(cache_max_age, (int)):
|
if isinstance(cache_max_age, int):
|
||||||
cc['max-age'] = int(cache_max_age)
|
cc['max-age'] = int(cache_max_age)
|
||||||
resp.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()])
|
response.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()])
|
||||||
self.cache.set(cache_url, resp)
|
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||||
|
|
||||||
# If the request can expire, it means we should cache it
|
# If the request can expire, it means we should cache it
|
||||||
# in the meantime.
|
# in the meantime.
|
||||||
elif 'expires' in resp.headers:
|
elif 'expires' in response_headers:
|
||||||
if getattr(resp.headers, 'expires', None) is not None:
|
if response_headers['expires']:
|
||||||
self.cache.set(cache_url, resp)
|
self.cache.set(
|
||||||
|
cache_url,
|
||||||
|
self.serializer.dumps(request, response),
|
||||||
|
)
|
||||||
|
|
||||||
def update_cached_response(self, request, response):
|
def update_cached_response(self, request, response):
|
||||||
"""On a 304 we will get a new set of headers that we want to
|
"""On a 304 we will get a new set of headers that we want to
|
||||||
@ -247,27 +242,22 @@ class CacheController(object):
|
|||||||
"""
|
"""
|
||||||
cache_url = self.cache_url(request.url)
|
cache_url = self.cache_url(request.url)
|
||||||
|
|
||||||
resp = self.cache.get(cache_url)
|
cached_response = self.serializer.loads(request, self.cache.get(cache_url))
|
||||||
|
|
||||||
if not resp:
|
if not cached_response:
|
||||||
# we didn't have a cached response
|
# we didn't have a cached response
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# did so lets update our headers
|
# did so lets update our headers
|
||||||
resp.headers.update(resp.headers)
|
cached_response.headers.update(response.headers)
|
||||||
|
|
||||||
# we want a 200 b/c we have content via the cache
|
# we want a 200 b/c we have content via the cache
|
||||||
request.status_code = 200
|
cached_response.status = 200
|
||||||
|
|
||||||
# update the request as it has the if-none-match header + any
|
|
||||||
# other headers that the server might have updated (ie Date,
|
|
||||||
# Cache-Control, Expires, etc.)
|
|
||||||
resp.request = request
|
|
||||||
|
|
||||||
# update our cache
|
# update our cache
|
||||||
self.cache.set(cache_url, resp)
|
self.cache.set(
|
||||||
|
cache_url,
|
||||||
|
self.serializer.dumps(request, cached_response),
|
||||||
|
)
|
||||||
|
|
||||||
# Let everyone know this was from the cache.
|
return cached_response
|
||||||
resp.from_cache = True
|
|
||||||
|
|
||||||
return resp
|
|
||||||
|
@ -52,4 +52,5 @@ def make_responses_pickleable():
|
|||||||
raise
|
raise
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
make_responses_pickleable()
|
make_responses_pickleable()
|
97
lib/cachecontrol/serialize.py
Normal file
97
lib/cachecontrol/serialize.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
import io
|
||||||
|
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
|
from .compat import HTTPResponse, pickle
|
||||||
|
|
||||||
|
|
||||||
|
class Serializer(object):
|
||||||
|
def dumps(self, request, response, body=None):
|
||||||
|
response_headers = CaseInsensitiveDict(response.headers)
|
||||||
|
|
||||||
|
if body is None:
|
||||||
|
# TODO: Figure out a way to handle this which doesn't break
|
||||||
|
# streaming
|
||||||
|
body = response.read(decode_content=False)
|
||||||
|
response._fp = io.BytesIO(body)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"response": {
|
||||||
|
"body": body,
|
||||||
|
"headers": response.headers,
|
||||||
|
"status": response.status,
|
||||||
|
"version": response.version,
|
||||||
|
"reason": response.reason,
|
||||||
|
"strict": response.strict,
|
||||||
|
"decode_content": response.decode_content,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Construct our vary headers
|
||||||
|
data["vary"] = {}
|
||||||
|
if "vary" in response_headers:
|
||||||
|
varied_headers = response_headers['vary'].split(',')
|
||||||
|
for header in varied_headers:
|
||||||
|
header = header.strip()
|
||||||
|
data["vary"][header] = request.headers.get(header, None)
|
||||||
|
|
||||||
|
return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
def loads(self, request, data):
|
||||||
|
# Short circuit if we've been given an empty set of data
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Determine what version of the serializer the data was serialized
|
||||||
|
# with
|
||||||
|
try:
|
||||||
|
ver, data = data.split(b",", 1)
|
||||||
|
except ValueError:
|
||||||
|
ver = b"cc=0"
|
||||||
|
|
||||||
|
# Make sure that our "ver" is actually a version and isn't a false
|
||||||
|
# positive from a , being in the data stream.
|
||||||
|
if ver[:3] != b"cc=":
|
||||||
|
data = ver + data
|
||||||
|
ver = b"cc=0"
|
||||||
|
|
||||||
|
# Get the version number out of the cc=N
|
||||||
|
ver = ver.split(b"=", 1)[-1].decode("ascii")
|
||||||
|
|
||||||
|
# Dispatch to the actual load method for the given version
|
||||||
|
try:
|
||||||
|
return getattr(self, "_loads_v{0}".format(ver))(request, data)
|
||||||
|
except AttributeError:
|
||||||
|
# This is a version we don't have a loads function for, so we'll
|
||||||
|
# just treat it as a miss and return None
|
||||||
|
return
|
||||||
|
|
||||||
|
def _loads_v0(self, request, data):
|
||||||
|
# The original legacy cache data. This doesn't contain enough
|
||||||
|
# information to construct everything we need, so we'll treat this as
|
||||||
|
# a miss.
|
||||||
|
return
|
||||||
|
|
||||||
|
def _loads_v1(self, request, data):
|
||||||
|
try:
|
||||||
|
cached = pickle.loads(data)
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Special case the '*' Vary value as it means we cannot actually
|
||||||
|
# determine if the cached response is suitable for this request.
|
||||||
|
if "*" in cached.get("vary", {}):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Ensure that the Vary headers for the cached response match our
|
||||||
|
# request
|
||||||
|
for header, value in cached.get("vary", {}).items():
|
||||||
|
if request.headers.get(header, None) != value:
|
||||||
|
return
|
||||||
|
|
||||||
|
body = io.BytesIO(cached["response"].pop("body"))
|
||||||
|
return HTTPResponse(
|
||||||
|
body=body,
|
||||||
|
preload_content=False,
|
||||||
|
**cached["response"]
|
||||||
|
)
|
@ -1,11 +1,16 @@
|
|||||||
from cachecontrol.adapter import CacheControlAdapter
|
from .adapter import CacheControlAdapter
|
||||||
from cachecontrol.cache import DictCache
|
from .cache import DictCache
|
||||||
from cachecontrol.session import CacheControlSession
|
from .session import CacheControlSession
|
||||||
|
|
||||||
def CacheControl(sess=None, cache=None, cache_etags=True):
|
def CacheControl(sess=None, cache=None, cache_etags=True, serializer=None):
|
||||||
sess = sess or CacheControlSession()
|
sess = sess or CacheControlSession()
|
||||||
cache = cache or DictCache()
|
cache = cache or DictCache()
|
||||||
adapter = CacheControlAdapter(cache, cache_etags=cache_etags)
|
adapter = CacheControlAdapter(
|
||||||
|
cache,
|
||||||
|
cache_etags=cache_etags,
|
||||||
|
serializer=serializer,
|
||||||
|
)
|
||||||
sess.mount('http://', adapter)
|
sess.mount('http://', adapter)
|
||||||
|
sess.mount('https://', adapter)
|
||||||
|
|
||||||
return sess
|
return sess
|
||||||
|
@ -45,7 +45,6 @@ class TorrentRssProvider(generic.TorrentProvider):
|
|||||||
self.url = re.sub('\/$', '', url)
|
self.url = re.sub('\/$', '', url)
|
||||||
self.enabled = True
|
self.enabled = True
|
||||||
self.supportsBacklog = False
|
self.supportsBacklog = False
|
||||||
self.session = None
|
|
||||||
|
|
||||||
def configStr(self):
|
def configStr(self):
|
||||||
return self.name + '|' + self.url + '|' + str(int(self.enabled))
|
return self.name + '|' + self.url + '|' + str(int(self.enabled))
|
||||||
@ -127,12 +126,9 @@ class TorrentRssProvider(generic.TorrentProvider):
|
|||||||
|
|
||||||
def getURL(self, url, post_data=None, headers=None):
|
def getURL(self, url, post_data=None, headers=None):
|
||||||
|
|
||||||
if not self.session:
|
|
||||||
self.session = requests.Session()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
url = urljoin(url, urlparse(url).path.replace('//', '/'))
|
url = urljoin(url, urlparse(url).path.replace('//', '/'))
|
||||||
response = self.session.get(url, verify=False)
|
response = requests.get(url, verify=False)
|
||||||
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError), e:
|
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError), e:
|
||||||
logger.log(u"Error loading " + self.name + " URL: " + ex(e), logger.ERROR)
|
logger.log(u"Error loading " + self.name + " URL: " + ex(e), logger.ERROR)
|
||||||
return None
|
return None
|
||||||
|
@ -307,8 +307,6 @@ class ThePirateBayProvider(generic.TorrentProvider):
|
|||||||
if self.proxy.isEnabled():
|
if self.proxy.isEnabled():
|
||||||
headers.update({'referer': self.proxy.getProxyURL()})
|
headers.update({'referer': self.proxy.getProxyURL()})
|
||||||
|
|
||||||
result = None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = requests.get(url, headers=headers)
|
r = requests.get(url, headers=headers)
|
||||||
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError), e:
|
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError), e:
|
||||||
|
Loading…
Reference in New Issue
Block a user