diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py new file mode 100644 index 00000000..693e11f1 --- /dev/null +++ b/lib/cachecontrol/__init__.py @@ -0,0 +1,13 @@ +"""CacheControl import Interface. + +Make it easy to import from cachecontrol without long namespaces. +""" + +# patch our requests.models.Response to make them pickleable in older +# versions of requests. + +import cachecontrol.patch_requests + +from cachecontrol.wrapper import CacheControl +from cachecontrol.adapter import CacheControlAdapter +from cachecontrol.controller import CacheController diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py new file mode 100644 index 00000000..27f58fc7 --- /dev/null +++ b/lib/cachecontrol/adapter.py @@ -0,0 +1,70 @@ +from requests.adapters import HTTPAdapter + +from cachecontrol.controller import CacheController +from cachecontrol.cache import DictCache + + +class CacheControlAdapter(HTTPAdapter): + invalidating_methods = set(['PUT', 'DELETE']) + + def __init__(self, cache=None, cache_etags=True, *args, **kw): + super(CacheControlAdapter, self).__init__(*args, **kw) + self.cache = cache or DictCache() + self.controller = CacheController(self.cache, cache_etags=cache_etags) + + def send(self, request, **kw): + """Send a request. Use the request information to see if it + exists in the cache. + """ + if request.method == 'GET': + cached_response = self.controller.cached_request( + request.url, request.headers + ) + if cached_response: + # Cached responses should not have a raw field since + # they *cannot* be created from some stream. + cached_response.raw = None + return cached_response + + # check for etags and add headers if appropriate + headers = self.controller.add_headers(request.url) + request.headers.update(headers) + + resp = super(CacheControlAdapter, self).send(request, **kw) + return resp + + def build_response(self, request, response): + """Build a response by making a request or using the cache. + + This will end up calling send and returning a potentially + cached response + """ + resp = super(CacheControlAdapter, self).build_response( + request, response + ) + + # See if we should invalidate the cache. + if request.method in self.invalidating_methods and resp.ok: + cache_url = self.controller.cache_url(request.url) + self.cache.delete(cache_url) + + # Try to store the response if it is a GET + elif request.method == 'GET': + if response.status == 304: + # We must have sent an ETag request. This could mean + # that we've been expired already or that we simply + # have an etag. In either case, we want to try and + # update the cache if that is the case. + resp = self.controller.update_cached_response( + request, response + ) + else: + # try to cache the response + self.controller.cache_response(request, resp) + + # Give the request a from_cache attr to let people use it + # rather than testing for hasattr. + if not hasattr(resp, 'from_cache'): + resp.from_cache = False + + return resp diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py new file mode 100644 index 00000000..feb7d3ed --- /dev/null +++ b/lib/cachecontrol/cache.py @@ -0,0 +1,36 @@ +""" +The cache object API for implementing caches. The default is just a +dictionary, which in turns means it is not threadsafe for writing. +""" +from threading import Lock + + +class BaseCache(object): + + def get(self, key): + raise NotImplemented() + + def set(self, key, value): + raise NotImplemented() + + def delete(self, key): + raise NotImplemented() + + +class DictCache(BaseCache): + + def __init__(self, init_dict=None): + self.lock = Lock() + self.data = init_dict or {} + + def get(self, key): + return self.data.get(key, None) + + def set(self, key, value): + with self.lock: + self.data.update({key: value}) + + def delete(self, key): + with self.lock: + if key in self.data: + self.data.pop(key) diff --git a/lib/cachecontrol/caches/__init__.py b/lib/cachecontrol/caches/__init__.py new file mode 100644 index 00000000..5e851b03 --- /dev/null +++ b/lib/cachecontrol/caches/__init__.py @@ -0,0 +1,18 @@ +from textwrap import dedent + +try: + from cachecontrol.caches.file_cache import FileCache +except ImportError: + notice = dedent(''' + NOTE: In order to use the FileCache you must have + lockfile installed. You can install it via pip: + pip install lockfile + ''') + print(notice) + + +try: + import redis + from cachecontrol.caches.redis_cache import RedisCache +except ImportError: + pass diff --git a/lib/cachecontrol/caches/file_cache.py b/lib/cachecontrol/caches/file_cache.py new file mode 100644 index 00000000..3a7d1a4c --- /dev/null +++ b/lib/cachecontrol/caches/file_cache.py @@ -0,0 +1,43 @@ +import os +import codecs + +from hashlib import md5 + +try: + from pickle import load, dump +except ImportError: + from cPickle import load, dump + +from lib.lockfile import FileLock + + +class FileCache(object): + + def __init__(self, directory, forever=False): + self.directory = directory + self.forever = forever + + if not os.path.isdir(self.directory): + os.mkdir(self.directory) + + def encode(self, x): + return md5(x.encode()).hexdigest() + + def _fn(self, name): + return os.path.join(self.directory, self.encode(name)) + + def get(self, key): + name = self._fn(key) + if os.path.exists(name): + return load(codecs.open(name, 'rb')) + + def set(self, key, value): + name = self._fn(key) + lock = FileLock(name) + with lock: + with codecs.open(lock.path, 'w+b') as fh: + dump(value, fh) + + def delete(self, key): + if not self.forever: + os.remove(self._fn(key)) diff --git a/lib/cachecontrol/caches/redis_cache.py b/lib/cachecontrol/caches/redis_cache.py new file mode 100644 index 00000000..d3814ebc --- /dev/null +++ b/lib/cachecontrol/caches/redis_cache.py @@ -0,0 +1,46 @@ +from __future__ import division + +from datetime import datetime + +try: + from cPickle import loads, dumps +except ImportError: # Python 3.x + from pickle import loads, dumps + + +def total_seconds(td): + """Python 2.6 compatability""" + if hasattr(td, 'total_seconds'): + return td.total_seconds() + + ms = td.microseconds + secs = (td.seconds + td.days * 24 * 3600) + return (ms + secs * 10**6) / 10**6 + + +class RedisCache(object): + + def __init__(self, conn): + self.conn = conn + + def get(self, key): + val = self.conn.get(key) + if val: + return loads(val) + return None + + def set(self, key, value, expires=None): + if not expires: + self.conn.set(key, dumps(value)) + else: + expires = expires - datetime.now() + self.conn.setex(key, total_seconds(expires), value) + + def delete(self, key): + self.conn.delete(key) + + def clear(self): + """Helper for clearing all the keys in a database. Use with + caution!""" + for key in self.conn.keys(): + self.conn.delete(key) diff --git a/lib/cachecontrol/compat.py b/lib/cachecontrol/compat.py new file mode 100644 index 00000000..1b6e596e --- /dev/null +++ b/lib/cachecontrol/compat.py @@ -0,0 +1,12 @@ +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin + + +try: + import email.utils + parsedate_tz = email.utils.parsedate_tz +except ImportError: + import email.Utils + parsedate_tz = email.Utils.parsedate_tz diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py new file mode 100644 index 00000000..67dd840e --- /dev/null +++ b/lib/cachecontrol/controller.py @@ -0,0 +1,247 @@ +""" +The httplib2 algorithms ported for use with requests. +""" +import re +import calendar +import time + +from cachecontrol.cache import DictCache +from cachecontrol.compat import parsedate_tz + + +URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") + + +def parse_uri(uri): + """Parses a URI using the regex given in Appendix B of RFC 3986. + + (scheme, authority, path, query, fragment) = parse_uri(uri) + """ + groups = URI.match(uri).groups() + return (groups[1], groups[3], groups[4], groups[6], groups[8]) + + +class CacheController(object): + """An interface to see if request should cached or not. + """ + def __init__(self, cache=None, cache_etags=True): + self.cache = cache or DictCache() + self.cache_etags = cache_etags + + def _urlnorm(self, uri): + """Normalize the URL to create a safe key for the cache""" + (scheme, authority, path, query, fragment) = parse_uri(uri) + if not scheme or not authority: + raise Exception("Only absolute URIs are allowed. uri = %s" % uri) + authority = authority.lower() + scheme = scheme.lower() + if not path: + path = "/" + + # Could do syntax based normalization of the URI before + # computing the digest. See Section 6.2.2 of Std 66. + request_uri = query and "?".join([path, query]) or path + scheme = scheme.lower() + defrag_uri = scheme + "://" + authority + request_uri + + return defrag_uri + + def cache_url(self, uri): + return self._urlnorm(uri) + + def parse_cache_control(self, headers): + """ + Parse the cache control headers returning a dictionary with values + for the different directives. + """ + retval = {} + + cc_header = 'cache-control' + if 'Cache-Control' in headers: + cc_header = 'Cache-Control' + + if cc_header in headers: + parts = headers[cc_header].split(',') + parts_with_args = [ + tuple([x.strip().lower() for x in part.split("=", 1)]) + for part in parts if -1 != part.find("=")] + parts_wo_args = [(name.strip().lower(), 1) + for name in parts if -1 == name.find("=")] + retval = dict(parts_with_args + parts_wo_args) + return retval + + def cached_request(self, url, headers): + cache_url = self.cache_url(url) + cc = self.parse_cache_control(headers) + + # non-caching states + no_cache = True if 'no-cache' in cc else False + if 'max-age' in cc and cc['max-age'] == 0: + no_cache = True + + # see if it is in the cache anyways + in_cache = self.cache.get(cache_url) + if no_cache or not in_cache: + return False + + # It is in the cache, so lets see if it is going to be + # fresh enough + resp = self.cache.get(cache_url) + + # Check our Vary header to make sure our request headers match + # up. We don't delete it from the though, we just don't return + # our cached value. + # + # NOTE: Because httplib2 stores raw content, it denotes + # headers that were sent in the original response by + # adding -varied-$name. We don't have to do that b/c we + # are storing the object which has a reference to the + # original request. If that changes, then I'd propose + # using the varied headers in the cache key to avoid the + # situation all together. + if 'vary' in resp.headers: + varied_headers = resp.headers['vary'].replace(' ', '').split(',') + original_headers = resp.request.headers + for header in varied_headers: + # If our headers don't match for the headers listed in + # the vary header, then don't use the cached response + if headers.get(header, None) != original_headers.get(header): + return False + + now = time.time() + date = calendar.timegm( + parsedate_tz(resp.headers['date']) + ) + current_age = max(0, now - date) + + # TODO: There is an assumption that the result will be a + # requests response object. This may not be best since we + # could probably avoid instantiating or constructing the + # response until we know we need it. + resp_cc = self.parse_cache_control(resp.headers) + + # determine freshness + freshness_lifetime = 0 + if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): + freshness_lifetime = int(resp_cc['max-age']) + elif 'expires' in resp.headers: + expires = parsedate_tz(resp.headers['expires']) + if expires is not None: + expire_time = calendar.timegm(expires) - date + freshness_lifetime = max(0, expire_time) + + # determine if we are setting freshness limit in the req + if 'max-age' in cc: + try: + freshness_lifetime = int(cc['max-age']) + except ValueError: + freshness_lifetime = 0 + + if 'min-fresh' in cc: + try: + min_fresh = int(cc['min-fresh']) + except ValueError: + min_fresh = 0 + # adjust our current age by our min fresh + current_age += min_fresh + + # see how fresh we actually are + fresh = (freshness_lifetime > current_age) + + if fresh: + # make sure we set the from_cache to true + resp.from_cache = True + return resp + + # we're not fresh. If we don't have an Etag, clear it out + if 'etag' not in resp.headers: + self.cache.delete(cache_url) + + if 'etag' in resp.headers: + headers['If-None-Match'] = resp.headers['ETag'] + + if 'last-modified' in resp.headers: + headers['If-Modified-Since'] = resp.headers['Last-Modified'] + + # return the original handler + return False + + def add_headers(self, url): + resp = self.cache.get(url) + if resp and 'etag' in resp.headers: + return {'If-None-Match': resp.headers['etag']} + return {} + + def cache_response(self, request, resp): + """ + Algorithm for caching requests. + + This assumes a requests Response object. + """ + # From httplib2: Don't cache 206's since we aren't going to + # handle byte range requests + if resp.status_code not in [200, 203]: + return + + cc_req = self.parse_cache_control(request.headers) + cc = self.parse_cache_control(resp.headers) + + cache_url = self.cache_url(request.url) + + # Delete it from the cache if we happen to have it stored there + no_store = cc.get('no-store') or cc_req.get('no-store') + if no_store and self.cache.get(cache_url): + self.cache.delete(cache_url) + + # If we've been given an etag, then keep the response + if self.cache_etags and 'etag' in resp.headers: + self.cache.set(cache_url, resp) + + # Add to the cache if the response headers demand it. If there + # is no date header then we can't do anything about expiring + # the cache. + elif 'date' in resp.headers: + # cache when there is a max-age > 0 + if cc and cc.get('max-age'): + if int(cc['max-age']) > 0: + self.cache.set(cache_url, resp) + + # If the request can expire, it means we should cache it + # in the meantime. + elif 'expires' in resp.headers: + if resp.headers['expires']: + self.cache.set(cache_url, resp) + + def update_cached_response(self, request, response): + """On a 304 we will get a new set of headers that we want to + update our cached value with, assuming we have one. + + This should only ever be called when we've sent an ETag and + gotten a 304 as the response. + """ + cache_url = self.cache_url(request.url) + + resp = self.cache.get(cache_url) + + if not resp: + # we didn't have a cached response + return response + + # did so lets update our headers + resp.headers.update(resp.headers) + + # we want a 200 b/c we have content via the cache + request.status_code = 200 + + # update the request as it has the if-none-match header + any + # other headers that the server might have updated (ie Date, + # Cache-Control, Expires, etc.) + resp.request = request + + # update our cache + self.cache.set(cache_url, resp) + + # Let everyone know this was from the cache. + resp.from_cache = True + + return resp diff --git a/lib/cachecontrol/patch_requests.py b/lib/cachecontrol/patch_requests.py new file mode 100644 index 00000000..cad60e17 --- /dev/null +++ b/lib/cachecontrol/patch_requests.py @@ -0,0 +1,56 @@ +import requests + +from requests import models +from requests.packages.urllib3.response import HTTPResponse + +__attrs__ = [ + '_content', + 'status_code', + 'headers', + 'url', + 'history', + 'encoding', + 'reason', + 'cookies', + 'elapsed', +] + + +def response_getstate(self): + # consume everything + if not self._content_consumed: + self.content + + state = dict( + (attr, getattr(self, attr, None)) + for attr in __attrs__ + ) + + # deal with our raw content b/c we need it for our cookie jar + state['raw_original_response'] = self.raw._original_response + return state + + +def response_setstate(self, state): + for name, value in state.items(): + if name != 'raw_original_response': + setattr(self, name, value) + + setattr(self, 'raw', HTTPResponse()) + self.raw._original_response = state['raw_original_response'] + + +def make_responses_pickleable(): + try: + version_parts = [int(part) for part in requests.__version__.split('.')] + + # must be >= 2.2.x + if not version_parts[0] >= 2 or not version_parts[1] >= 2: + models.Response.__getstate__ = response_getstate + models.Response.__setstate__ = response_setstate + except: + raise + pass + + +make_responses_pickleable() diff --git a/lib/cachecontrol/wrapper.py b/lib/cachecontrol/wrapper.py new file mode 100644 index 00000000..d32f60a6 --- /dev/null +++ b/lib/cachecontrol/wrapper.py @@ -0,0 +1,10 @@ +from cachecontrol.adapter import CacheControlAdapter +from cachecontrol.cache import DictCache + + +def CacheControl(sess, cache=None, cache_etags=True): + cache = cache or DictCache() + adapter = CacheControlAdapter(cache, cache_etags=cache_etags) + sess.mount('http://', adapter) + + return sess diff --git a/lib/dateutil/zoneinfo/zoneinfo-2013i.tar.gz b/lib/dateutil/zoneinfo/zoneinfo-2013i.tar.gz deleted file mode 100644 index 2e30670c..00000000 Binary files a/lib/dateutil/zoneinfo/zoneinfo-2013i.tar.gz and /dev/null differ diff --git a/lib/lockfile/__init__.py b/lib/lockfile/__init__.py new file mode 100644 index 00000000..668b426f --- /dev/null +++ b/lib/lockfile/__init__.py @@ -0,0 +1,317 @@ +""" +lockfile.py - Platform-independent advisory file locks. + +Requires Python 2.5 unless you apply 2.4.diff +Locking is done on a per-thread basis instead of a per-process basis. + +Usage: + +>>> lock = LockFile('somefile') +>>> try: +... lock.acquire() +... except AlreadyLocked: +... print 'somefile', 'is locked already.' +... except LockFailed: +... print 'somefile', 'can\\'t be locked.' +... else: +... print 'got lock' +got lock +>>> print lock.is_locked() +True +>>> lock.release() + +>>> lock = LockFile('somefile') +>>> print lock.is_locked() +False +>>> with lock: +... print lock.is_locked() +True +>>> print lock.is_locked() +False + +>>> lock = LockFile('somefile') +>>> # It is okay to lock twice from the same thread... +>>> with lock: +... lock.acquire() +... +>>> # Though no counter is kept, so you can't unlock multiple times... +>>> print lock.is_locked() +False + +Exceptions: + + Error - base class for other exceptions + LockError - base class for all locking exceptions + AlreadyLocked - Another thread or process already holds the lock + LockFailed - Lock failed for some other reason + UnlockError - base class for all unlocking exceptions + AlreadyUnlocked - File was not locked. + NotMyLock - File was locked but not by the current thread/process +""" + +from __future__ import absolute_import + +import sys +import socket +import os +import threading +import time +import urllib +import warnings +import functools + +# Work with PEP8 and non-PEP8 versions of threading module. +if not hasattr(threading, "current_thread"): + threading.current_thread = threading.currentThread +if not hasattr(threading.Thread, "get_name"): + threading.Thread.get_name = threading.Thread.getName + +__all__ = ['Error', 'LockError', 'LockTimeout', 'AlreadyLocked', + 'LockFailed', 'UnlockError', 'NotLocked', 'NotMyLock', + 'LinkLockFile', 'MkdirLockFile', 'SQLiteLockFile', + 'LockBase', 'locked'] + +class Error(Exception): + """ + Base class for other exceptions. + + >>> try: + ... raise Error + ... except Exception: + ... pass + """ + pass + +class LockError(Error): + """ + Base class for error arising from attempts to acquire the lock. + + >>> try: + ... raise LockError + ... except Error: + ... pass + """ + pass + +class LockTimeout(LockError): + """Raised when lock creation fails within a user-defined period of time. + + >>> try: + ... raise LockTimeout + ... except LockError: + ... pass + """ + pass + +class AlreadyLocked(LockError): + """Some other thread/process is locking the file. + + >>> try: + ... raise AlreadyLocked + ... except LockError: + ... pass + """ + pass + +class LockFailed(LockError): + """Lock file creation failed for some other reason. + + >>> try: + ... raise LockFailed + ... except LockError: + ... pass + """ + pass + +class UnlockError(Error): + """ + Base class for errors arising from attempts to release the lock. + + >>> try: + ... raise UnlockError + ... except Error: + ... pass + """ + pass + +class NotLocked(UnlockError): + """Raised when an attempt is made to unlock an unlocked file. + + >>> try: + ... raise NotLocked + ... except UnlockError: + ... pass + """ + pass + +class NotMyLock(UnlockError): + """Raised when an attempt is made to unlock a file someone else locked. + + >>> try: + ... raise NotMyLock + ... except UnlockError: + ... pass + """ + pass + +class LockBase: + """Base class for platform-specific lock classes.""" + def __init__(self, path, threaded=True, timeout=None): + """ + >>> lock = LockBase('somefile') + >>> lock = LockBase('somefile', threaded=False) + """ + self.path = path + self.lock_file = os.path.abspath(path) + ".lock" + self.hostname = socket.gethostname() + self.pid = os.getpid() + if threaded: + t = threading.current_thread() + # Thread objects in Python 2.4 and earlier do not have ident + # attrs. Worm around that. + ident = getattr(t, "ident", hash(t)) + self.tname = "-%x" % (ident & 0xffffffff) + else: + self.tname = "" + dirname = os.path.dirname(self.lock_file) + self.unique_name = os.path.join(dirname, + "%s%s.%s" % (self.hostname, + self.tname, + self.pid)) + self.timeout = timeout + + def acquire(self, timeout=None): + """ + Acquire the lock. + + * If timeout is omitted (or None), wait forever trying to lock the + file. + + * If timeout > 0, try to acquire the lock for that many seconds. If + the lock period expires and the file is still locked, raise + LockTimeout. + + * If timeout <= 0, raise AlreadyLocked immediately if the file is + already locked. + """ + raise NotImplemented("implement in subclass") + + def release(self): + """ + Release the lock. + + If the file is not locked, raise NotLocked. + """ + raise NotImplemented("implement in subclass") + + def is_locked(self): + """ + Tell whether or not the file is locked. + """ + raise NotImplemented("implement in subclass") + + def i_am_locking(self): + """ + Return True if this object is locking the file. + """ + raise NotImplemented("implement in subclass") + + def break_lock(self): + """ + Remove a lock. Useful if a locking thread failed to unlock. + """ + raise NotImplemented("implement in subclass") + + def __enter__(self): + """ + Context manager support. + """ + self.acquire() + return self + + def __exit__(self, *_exc): + """ + Context manager support. + """ + self.release() + + def __repr__(self): + return "<%s: %r -- %r>" % (self.__class__.__name__, self.unique_name, + self.path) + +def _fl_helper(cls, mod, *args, **kwds): + warnings.warn("Import from %s module instead of lockfile package" % mod, + DeprecationWarning, stacklevel=2) + # This is a bit funky, but it's only for awhile. The way the unit tests + # are constructed this function winds up as an unbound method, so it + # actually takes three args, not two. We want to toss out self. + if not isinstance(args[0], str): + # We are testing, avoid the first arg + args = args[1:] + if len(args) == 1 and not kwds: + kwds["threaded"] = True + return cls(*args, **kwds) + +def LinkFileLock(*args, **kwds): + """Factory function provided for backwards compatibility. + + Do not use in new code. Instead, import LinkLockFile from the + lockfile.linklockfile module. + """ + from . import linklockfile + return _fl_helper(linklockfile.LinkLockFile, "lockfile.linklockfile", + *args, **kwds) + +def MkdirFileLock(*args, **kwds): + """Factory function provided for backwards compatibility. + + Do not use in new code. Instead, import MkdirLockFile from the + lockfile.mkdirlockfile module. + """ + from . import mkdirlockfile + return _fl_helper(mkdirlockfile.MkdirLockFile, "lockfile.mkdirlockfile", + *args, **kwds) + +def SQLiteFileLock(*args, **kwds): + """Factory function provided for backwards compatibility. + + Do not use in new code. Instead, import SQLiteLockFile from the + lockfile.mkdirlockfile module. + """ + from . import sqlitelockfile + return _fl_helper(sqlitelockfile.SQLiteLockFile, "lockfile.sqlitelockfile", + *args, **kwds) + +def locked(path, timeout=None): + """Decorator which enables locks for decorated function. + + Arguments: + - path: path for lockfile. + - timeout (optional): Timeout for acquiring lock. + + Usage: + @locked('/var/run/myname', timeout=0) + def myname(...): + ... + """ + def decor(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + lock = FileLock(path, timeout=timeout) + lock.acquire() + try: + return func(*args, **kwargs) + finally: + lock.release() + return wrapper + return decor + +if hasattr(os, "link"): + from . import linklockfile as _llf + LockFile = _llf.LinkLockFile +else: + from . import mkdirlockfile as _mlf + LockFile = _mlf.MkdirLockFile + +FileLock = LockFile + diff --git a/lib/lockfile/linklockfile.py b/lib/lockfile/linklockfile.py new file mode 100644 index 00000000..9c506734 --- /dev/null +++ b/lib/lockfile/linklockfile.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import + +import time +import os + +from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout, + AlreadyLocked) + +class LinkLockFile(LockBase): + """Lock access to a file using atomic property of link(2). + + >>> lock = LinkLockFile('somefile') + >>> lock = LinkLockFile('somefile', threaded=False) + """ + + def acquire(self, timeout=None): + try: + open(self.unique_name, "wb").close() + except IOError: + raise LockFailed("failed to create %s" % self.unique_name) + + timeout = timeout is not None and timeout or self.timeout + end_time = time.time() + if timeout is not None and timeout > 0: + end_time += timeout + + while True: + # Try and create a hard link to it. + try: + os.link(self.unique_name, self.lock_file) + except OSError: + # Link creation failed. Maybe we've double-locked? + nlinks = os.stat(self.unique_name).st_nlink + if nlinks == 2: + # The original link plus the one I created == 2. We're + # good to go. + return + else: + # Otherwise the lock creation failed. + if timeout is not None and time.time() > end_time: + os.unlink(self.unique_name) + if timeout > 0: + raise LockTimeout("Timeout waiting to acquire" + " lock for %s" % + self.path) + else: + raise AlreadyLocked("%s is already locked" % + self.path) + time.sleep(timeout is not None and timeout/10 or 0.1) + else: + # Link creation succeeded. We're good to go. + return + + def release(self): + if not self.is_locked(): + raise NotLocked("%s is not locked" % self.path) + elif not os.path.exists(self.unique_name): + raise NotMyLock("%s is locked, but not by me" % self.path) + os.unlink(self.unique_name) + os.unlink(self.lock_file) + + def is_locked(self): + return os.path.exists(self.lock_file) + + def i_am_locking(self): + return (self.is_locked() and + os.path.exists(self.unique_name) and + os.stat(self.unique_name).st_nlink == 2) + + def break_lock(self): + if os.path.exists(self.lock_file): + os.unlink(self.lock_file) + diff --git a/lib/lockfile/mkdirlockfile.py b/lib/lockfile/mkdirlockfile.py new file mode 100644 index 00000000..8d2c801f --- /dev/null +++ b/lib/lockfile/mkdirlockfile.py @@ -0,0 +1,83 @@ +from __future__ import absolute_import, division + +import time +import os +import sys +import errno + +from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout, + AlreadyLocked) + +class MkdirLockFile(LockBase): + """Lock file by creating a directory.""" + def __init__(self, path, threaded=True, timeout=None): + """ + >>> lock = MkdirLockFile('somefile') + >>> lock = MkdirLockFile('somefile', threaded=False) + """ + LockBase.__init__(self, path, threaded, timeout) + # Lock file itself is a directory. Place the unique file name into + # it. + self.unique_name = os.path.join(self.lock_file, + "%s.%s%s" % (self.hostname, + self.tname, + self.pid)) + + def acquire(self, timeout=None): + timeout = timeout is not None and timeout or self.timeout + end_time = time.time() + if timeout is not None and timeout > 0: + end_time += timeout + + if timeout is None: + wait = 0.1 + else: + wait = max(0, timeout / 10) + + while True: + try: + os.mkdir(self.lock_file) + except OSError: + err = sys.exc_info()[1] + if err.errno == errno.EEXIST: + # Already locked. + if os.path.exists(self.unique_name): + # Already locked by me. + return + if timeout is not None and time.time() > end_time: + if timeout > 0: + raise LockTimeout("Timeout waiting to acquire" + " lock for %s" % + self.path) + else: + # Someone else has the lock. + raise AlreadyLocked("%s is already locked" % + self.path) + time.sleep(wait) + else: + # Couldn't create the lock for some other reason + raise LockFailed("failed to create %s" % self.lock_file) + else: + open(self.unique_name, "wb").close() + return + + def release(self): + if not self.is_locked(): + raise NotLocked("%s is not locked" % self.path) + elif not os.path.exists(self.unique_name): + raise NotMyLock("%s is locked, but not by me" % self.path) + os.unlink(self.unique_name) + os.rmdir(self.lock_file) + + def is_locked(self): + return os.path.exists(self.lock_file) + + def i_am_locking(self): + return (self.is_locked() and + os.path.exists(self.unique_name)) + + def break_lock(self): + if os.path.exists(self.lock_file): + for name in os.listdir(self.lock_file): + os.unlink(os.path.join(self.lock_file, name)) + os.rmdir(self.lock_file) diff --git a/lib/lockfile/pidlockfile.py b/lib/lockfile/pidlockfile.py new file mode 100644 index 00000000..e92f9ead --- /dev/null +++ b/lib/lockfile/pidlockfile.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- + +# pidlockfile.py +# +# Copyright © 2008–2009 Ben Finney +# +# This is free software: you may copy, modify, and/or distribute this work +# under the terms of the Python Software Foundation License, version 2 or +# later as published by the Python Software Foundation. +# No warranty expressed or implied. See the file LICENSE.PSF-2 for details. + +""" Lockfile behaviour implemented via Unix PID files. + """ + +from __future__ import absolute_import + +import os +import sys +import errno +import time + +from . import (LockBase, AlreadyLocked, LockFailed, NotLocked, NotMyLock, + LockTimeout) + + +class PIDLockFile(LockBase): + """ Lockfile implemented as a Unix PID file. + + The lock file is a normal file named by the attribute `path`. + A lock's PID file contains a single line of text, containing + the process ID (PID) of the process that acquired the lock. + + >>> lock = PIDLockFile('somefile') + >>> lock = PIDLockFile('somefile') + """ + + def __init__(self, path, threaded=False, timeout=None): + # pid lockfiles don't support threaded operation, so always force + # False as the threaded arg. + LockBase.__init__(self, path, False, timeout) + dirname = os.path.dirname(self.lock_file) + basename = os.path.split(self.path)[-1] + self.unique_name = self.path + + def read_pid(self): + """ Get the PID from the lock file. + """ + return read_pid_from_pidfile(self.path) + + def is_locked(self): + """ Test if the lock is currently held. + + The lock is held if the PID file for this lock exists. + + """ + return os.path.exists(self.path) + + def i_am_locking(self): + """ Test if the lock is held by the current process. + + Returns ``True`` if the current process ID matches the + number stored in the PID file. + """ + return self.is_locked() and os.getpid() == self.read_pid() + + def acquire(self, timeout=None): + """ Acquire the lock. + + Creates the PID file for this lock, or raises an error if + the lock could not be acquired. + """ + + timeout = timeout is not None and timeout or self.timeout + end_time = time.time() + if timeout is not None and timeout > 0: + end_time += timeout + + while True: + try: + write_pid_to_pidfile(self.path) + except OSError as exc: + if exc.errno == errno.EEXIST: + # The lock creation failed. Maybe sleep a bit. + if timeout is not None and time.time() > end_time: + if timeout > 0: + raise LockTimeout("Timeout waiting to acquire" + " lock for %s" % + self.path) + else: + raise AlreadyLocked("%s is already locked" % + self.path) + time.sleep(timeout is not None and timeout/10 or 0.1) + else: + raise LockFailed("failed to create %s" % self.path) + else: + return + + def release(self): + """ Release the lock. + + Removes the PID file to release the lock, or raises an + error if the current process does not hold the lock. + + """ + if not self.is_locked(): + raise NotLocked("%s is not locked" % self.path) + if not self.i_am_locking(): + raise NotMyLock("%s is locked, but not by me" % self.path) + remove_existing_pidfile(self.path) + + def break_lock(self): + """ Break an existing lock. + + Removes the PID file if it already exists, otherwise does + nothing. + + """ + remove_existing_pidfile(self.path) + +def read_pid_from_pidfile(pidfile_path): + """ Read the PID recorded in the named PID file. + + Read and return the numeric PID recorded as text in the named + PID file. If the PID file cannot be read, or if the content is + not a valid PID, return ``None``. + + """ + pid = None + try: + pidfile = open(pidfile_path, 'r') + except IOError: + pass + else: + # According to the FHS 2.3 section on PID files in /var/run: + # + # The file must consist of the process identifier in + # ASCII-encoded decimal, followed by a newline character. + # + # Programs that read PID files should be somewhat flexible + # in what they accept; i.e., they should ignore extra + # whitespace, leading zeroes, absence of the trailing + # newline, or additional lines in the PID file. + + line = pidfile.readline().strip() + try: + pid = int(line) + except ValueError: + pass + pidfile.close() + + return pid + + +def write_pid_to_pidfile(pidfile_path): + """ Write the PID in the named PID file. + + Get the numeric process ID (“PID”) of the current process + and write it to the named file as a line of text. + + """ + open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY) + open_mode = 0o644 + pidfile_fd = os.open(pidfile_path, open_flags, open_mode) + pidfile = os.fdopen(pidfile_fd, 'w') + + # According to the FHS 2.3 section on PID files in /var/run: + # + # The file must consist of the process identifier in + # ASCII-encoded decimal, followed by a newline character. For + # example, if crond was process number 25, /var/run/crond.pid + # would contain three characters: two, five, and newline. + + pid = os.getpid() + line = "%(pid)d\n" % vars() + pidfile.write(line) + pidfile.close() + + +def remove_existing_pidfile(pidfile_path): + """ Remove the named PID file if it exists. + + Removing a PID file that doesn't already exist puts us in the + desired state, so we ignore the condition if the file does not + exist. + + """ + try: + os.remove(pidfile_path) + except OSError as exc: + if exc.errno == errno.ENOENT: + pass + else: + raise diff --git a/lib/lockfile/sqlitelockfile.py b/lib/lockfile/sqlitelockfile.py new file mode 100644 index 00000000..7dee4a85 --- /dev/null +++ b/lib/lockfile/sqlitelockfile.py @@ -0,0 +1,155 @@ +from __future__ import absolute_import, division + +import time +import os + +try: + unicode +except NameError: + unicode = str + +from . import LockBase, NotLocked, NotMyLock, LockTimeout, AlreadyLocked + +class SQLiteLockFile(LockBase): + "Demonstrate SQL-based locking." + + testdb = None + + def __init__(self, path, threaded=True, timeout=None): + """ + >>> lock = SQLiteLockFile('somefile') + >>> lock = SQLiteLockFile('somefile', threaded=False) + """ + LockBase.__init__(self, path, threaded, timeout) + self.lock_file = unicode(self.lock_file) + self.unique_name = unicode(self.unique_name) + + if SQLiteLockFile.testdb is None: + import tempfile + _fd, testdb = tempfile.mkstemp() + os.close(_fd) + os.unlink(testdb) + del _fd, tempfile + SQLiteLockFile.testdb = testdb + + import sqlite3 + self.connection = sqlite3.connect(SQLiteLockFile.testdb) + + c = self.connection.cursor() + try: + c.execute("create table locks" + "(" + " lock_file varchar(32)," + " unique_name varchar(32)" + ")") + except sqlite3.OperationalError: + pass + else: + self.connection.commit() + import atexit + atexit.register(os.unlink, SQLiteLockFile.testdb) + + def acquire(self, timeout=None): + timeout = timeout is not None and timeout or self.timeout + end_time = time.time() + if timeout is not None and timeout > 0: + end_time += timeout + + if timeout is None: + wait = 0.1 + elif timeout <= 0: + wait = 0 + else: + wait = timeout / 10 + + cursor = self.connection.cursor() + + while True: + if not self.is_locked(): + # Not locked. Try to lock it. + cursor.execute("insert into locks" + " (lock_file, unique_name)" + " values" + " (?, ?)", + (self.lock_file, self.unique_name)) + self.connection.commit() + + # Check to see if we are the only lock holder. + cursor.execute("select * from locks" + " where unique_name = ?", + (self.unique_name,)) + rows = cursor.fetchall() + if len(rows) > 1: + # Nope. Someone else got there. Remove our lock. + cursor.execute("delete from locks" + " where unique_name = ?", + (self.unique_name,)) + self.connection.commit() + else: + # Yup. We're done, so go home. + return + else: + # Check to see if we are the only lock holder. + cursor.execute("select * from locks" + " where unique_name = ?", + (self.unique_name,)) + rows = cursor.fetchall() + if len(rows) == 1: + # We're the locker, so go home. + return + + # Maybe we should wait a bit longer. + if timeout is not None and time.time() > end_time: + if timeout > 0: + # No more waiting. + raise LockTimeout("Timeout waiting to acquire" + " lock for %s" % + self.path) + else: + # Someone else has the lock and we are impatient.. + raise AlreadyLocked("%s is already locked" % self.path) + + # Well, okay. We'll give it a bit longer. + time.sleep(wait) + + def release(self): + if not self.is_locked(): + raise NotLocked("%s is not locked" % self.path) + if not self.i_am_locking(): + raise NotMyLock("%s is locked, but not by me (by %s)" % + (self.unique_name, self._who_is_locking())) + cursor = self.connection.cursor() + cursor.execute("delete from locks" + " where unique_name = ?", + (self.unique_name,)) + self.connection.commit() + + def _who_is_locking(self): + cursor = self.connection.cursor() + cursor.execute("select unique_name from locks" + " where lock_file = ?", + (self.lock_file,)) + return cursor.fetchone()[0] + + def is_locked(self): + cursor = self.connection.cursor() + cursor.execute("select * from locks" + " where lock_file = ?", + (self.lock_file,)) + rows = cursor.fetchall() + return not not rows + + def i_am_locking(self): + cursor = self.connection.cursor() + cursor.execute("select * from locks" + " where lock_file = ?" + " and unique_name = ?", + (self.lock_file, self.unique_name)) + return not not cursor.fetchall() + + def break_lock(self): + cursor = self.connection.cursor() + cursor.execute("delete from locks" + " where lock_file = ?", + (self.lock_file,)) + self.connection.commit() diff --git a/lib/lockfile/symlinklockfile.py b/lib/lockfile/symlinklockfile.py new file mode 100644 index 00000000..57551a36 --- /dev/null +++ b/lib/lockfile/symlinklockfile.py @@ -0,0 +1,69 @@ +from __future__ import absolute_import + +import time +import os + +from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout, + AlreadyLocked) + +class SymlinkLockFile(LockBase): + """Lock access to a file using symlink(2).""" + + def __init__(self, path, threaded=True, timeout=None): + # super(SymlinkLockFile).__init(...) + LockBase.__init__(self, path, threaded, timeout) + # split it back! + self.unique_name = os.path.split(self.unique_name)[1] + + def acquire(self, timeout=None): + # Hopefully unnecessary for symlink. + #try: + # open(self.unique_name, "wb").close() + #except IOError: + # raise LockFailed("failed to create %s" % self.unique_name) + timeout = timeout is not None and timeout or self.timeout + end_time = time.time() + if timeout is not None and timeout > 0: + end_time += timeout + + while True: + # Try and create a symbolic link to it. + try: + os.symlink(self.unique_name, self.lock_file) + except OSError: + # Link creation failed. Maybe we've double-locked? + if self.i_am_locking(): + # Linked to out unique name. Proceed. + return + else: + # Otherwise the lock creation failed. + if timeout is not None and time.time() > end_time: + if timeout > 0: + raise LockTimeout("Timeout waiting to acquire" + " lock for %s" % + self.path) + else: + raise AlreadyLocked("%s is already locked" % + self.path) + time.sleep(timeout/10 if timeout is not None else 0.1) + else: + # Link creation succeeded. We're good to go. + return + + def release(self): + if not self.is_locked(): + raise NotLocked("%s is not locked" % self.path) + elif not self.i_am_locking(): + raise NotMyLock("%s is locked, but not by me" % self.path) + os.unlink(self.lock_file) + + def is_locked(self): + return os.path.islink(self.lock_file) + + def i_am_locking(self): + return os.path.islink(self.lock_file) and \ + os.readlink(self.lock_file) == self.unique_name + + def break_lock(self): + if os.path.islink(self.lock_file): # exists && link + os.unlink(self.lock_file) diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index 90fcb033..bba19002 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -42,8 +42,8 @@ is at . """ __title__ = 'requests' -__version__ = '2.2.0' -__build__ = 0x020200 +__version__ = '2.3.0' +__build__ = 0x020300 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2014 Kenneth Reitz' diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index dd10e959..28bea07c 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -310,10 +310,7 @@ class HTTPAdapter(BaseAdapter): chunked = not (request.body is None or 'Content-Length' in request.headers) - if stream: - timeout = TimeoutSauce(connect=timeout) - else: - timeout = TimeoutSauce(connect=timeout, read=timeout) + timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: @@ -372,19 +369,19 @@ class HTTPAdapter(BaseAdapter): conn._put_conn(low_conn) except socket.error as sockerr: - raise ConnectionError(sockerr) + raise ConnectionError(sockerr, request=request) except MaxRetryError as e: - raise ConnectionError(e) + raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): - raise SSLError(e) + raise SSLError(e, request=request) elif isinstance(e, TimeoutError): - raise Timeout(e) + raise Timeout(e, request=request) else: raise diff --git a/lib/requests/api.py b/lib/requests/api.py index baf43dd6..01d853d5 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -26,7 +26,7 @@ def request(method, url, **kwargs): :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. + :param timeout: (optional) Float describing the timeout of the request in seconds. :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. diff --git a/lib/requests/auth.py b/lib/requests/auth.py index 6664cd80..9f831b7a 100644 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -11,7 +11,6 @@ import os import re import time import hashlib -import logging from base64 import b64encode @@ -19,8 +18,6 @@ from .compat import urlparse, str from .cookies import extract_cookies_to_jar from .utils import parse_dict_header -log = logging.getLogger(__name__) - CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' diff --git a/lib/requests/certs.py b/lib/requests/certs.py index 57229101..bc008261 100644 --- a/lib/requests/certs.py +++ b/lib/requests/certs.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # -*- coding: utf-8 -*- """ diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py index cd3c7600..a4ee9d63 100644 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -14,15 +14,22 @@ class RequestException(IOError): """There was an ambiguous exception that occurred while handling your request.""" + def __init__(self, *args, **kwargs): + """ + Initialize RequestException with `request` and `response` objects. + """ + response = kwargs.pop('response', None) + self.response = response + self.request = kwargs.pop('request', None) + if (response is not None and not self.request and + hasattr(response, 'request')): + self.request = self.response.request + super(RequestException, self).__init__(*args, **kwargs) + class HTTPError(RequestException): """An HTTP error occurred.""" - def __init__(self, *args, **kwargs): - """ Initializes HTTPError with optional `response` object. """ - self.response = kwargs.pop('response', None) - super(HTTPError, self).__init__(*args, **kwargs) - class ConnectionError(RequestException): """A Connection error occurred.""" diff --git a/lib/requests/models.py b/lib/requests/models.py index ae46a831..e2fa09f8 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -8,7 +8,6 @@ This module contains the primary objects that power Requests. """ import collections -import logging import datetime from io import BytesIO, UnsupportedOperation @@ -31,12 +30,20 @@ from .utils import ( from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, is_py2, chardet, json, builtin_str, basestring, IncompleteRead) +from .status_codes import codes +#: The set of HTTP status codes that indicate an automatically +#: processable redirect. +REDIRECT_STATI = ( + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_moved, # 307 +) +DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -log = logging.getLogger(__name__) - class RequestEncodingMixin(object): @property @@ -517,7 +524,7 @@ class Response(object): self._content = False self._content_consumed = False - #: Integer Code of responded HTTP Status. + #: Integer Code of responded HTTP Status, e.g. 404 or 200. self.status_code = None #: Case-insensitive Dictionary of Response Headers. @@ -541,6 +548,7 @@ class Response(object): #: up here. The list is sorted from the oldest to the most recent request. self.history = [] + #: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK". self.reason = None #: A CookieJar of Cookies the server sent back. @@ -567,6 +575,7 @@ class Response(object): # pickled objects do not have .raw setattr(self, '_content_consumed', True) + setattr(self, 'raw', None) def __repr__(self): return '' % (self.status_code) @@ -591,10 +600,16 @@ class Response(object): return False return True + @property + def is_redirect(self): + """True if this Response is a well-formed HTTP redirect that could have + been processed automatically (by :meth:`Session.resolve_redirects`). + """ + return ('location' in self.headers and self.status_code in REDIRECT_STATI) + @property def apparent_encoding(self): - """The apparent encoding, provided by the lovely Charade library - (Thanks, Ian!).""" + """The apparent encoding, provided by the chardet library""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): @@ -612,8 +627,7 @@ class Response(object): try: # Special case for urllib3. try: - for chunk in self.raw.stream(chunk_size, - decode_content=True): + for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk except IncompleteRead as e: raise ChunkedEncodingError(e) @@ -644,8 +658,7 @@ class Response(object): pending = None - for chunk in self.iter_content(chunk_size=chunk_size, - decode_unicode=decode_unicode): + for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): if pending is not None: chunk = pending + chunk @@ -693,7 +706,7 @@ class Response(object): If Response.encoding is None, encoding will be guessed using ``chardet``. - The encoding of the response content is determined based soley on HTTP + The encoding of the response content is determined based solely on HTTP headers, following RFC 2616 to the letter. If you can take advantage of non-HTTP knowledge to make a better guess at the encoding, you should set ``r.encoding`` appropriately before accessing this property. @@ -737,7 +750,14 @@ class Response(object): # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: - return json.loads(self.content.decode(encoding), **kwargs) + try: + return json.loads(self.content.decode(encoding), **kwargs) + except UnicodeDecodeError: + # Wrong UTF codec detected; usually because it's not UTF-8 + # but some other 8-bit codec. This is an RFC violation, + # and the server didn't bother to tell us what codec *was* + # used. + pass return json.loads(self.text, **kwargs) @property diff --git a/lib/requests/packages/chardet/chardetect.py b/lib/requests/packages/chardet/chardetect.py index 31010659..ecd0163b 100644 --- a/lib/requests/packages/chardet/chardetect.py +++ b/lib/requests/packages/chardet/chardetect.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python """ Script which takes one or more file paths and reports on their detected encodings diff --git a/lib/requests/packages/urllib3/connection.py b/lib/requests/packages/urllib3/connection.py index c3f302d3..21247745 100644 --- a/lib/requests/packages/urllib3/connection.py +++ b/lib/requests/packages/urllib3/connection.py @@ -8,9 +8,9 @@ import socket from socket import timeout as SocketTimeout try: # Python 3 - from http.client import HTTPConnection, HTTPException + from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: - from httplib import HTTPConnection, HTTPException + from httplib import HTTPConnection as _HTTPConnection, HTTPException class DummyConnection(object): "Used to detect a failed ConnectionCls import." @@ -24,9 +24,9 @@ try: # Compiled with SSL? pass try: # Python 3 - from http.client import HTTPSConnection + from http.client import HTTPSConnection as _HTTPSConnection except ImportError: - from httplib import HTTPSConnection + from httplib import HTTPSConnection as _HTTPSConnection import ssl BaseSSLError = ssl.SSLError @@ -45,6 +45,69 @@ from .util import ( ssl_wrap_socket, ) + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + + +class HTTPConnection(_HTTPConnection, object): + default_port = port_by_scheme['http'] + + # By default, disable Nagle's Algorithm. + tcp_nodelay = 1 + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it + + :return: a new socket connection + """ + try: + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + self.source_address, + ) + except AttributeError: # Python 2.6 + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + ) + conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + return conn + + def _prepare_conn(self, conn): + self.sock = conn + if self._tunnel_host: + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + try: + HTTPConnection.__init__(self, host, port, strict, timeout, source_address) + except TypeError: # Python 2.6 + HTTPConnection.__init__(self, host, port, strict, timeout) + self.key_file = key_file + self.cert_file = cert_file + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + class VerifiedHTTPSConnection(HTTPSConnection): """ Based on httplib.HTTPSConnection but wraps the socket with @@ -73,9 +136,12 @@ class VerifiedHTTPSConnection(HTTPSConnection): timeout=self.timeout, ) except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) @@ -107,4 +173,6 @@ class VerifiedHTTPSConnection(HTTPSConnection): if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection HTTPSConnection = VerifiedHTTPSConnection diff --git a/lib/requests/packages/urllib3/connectionpool.py b/lib/requests/packages/urllib3/connectionpool.py index 44ecffd0..243d700e 100644 --- a/lib/requests/packages/urllib3/connectionpool.py +++ b/lib/requests/packages/urllib3/connectionpool.py @@ -31,6 +31,7 @@ from .exceptions import ( from .packages.ssl_match_hostname import CertificateError from .packages import six from .connection import ( + port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, HTTPException, BaseSSLError, @@ -51,12 +52,6 @@ log = logging.getLogger(__name__) _Default = object() -port_by_scheme = { - 'http': 80, - 'https': 443, -} - - ## Pool objects class ConnectionPool(object): @@ -169,7 +164,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % @@ -179,9 +174,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not six.PY3: # Python 2 extra_params['strict'] = self.strict - return self.ConnectionCls(host=self.host, port=self.port, + conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + return conn def _get_conn(self, timeout=None): """ @@ -260,7 +260,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -517,17 +517,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) - # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e if retries == 0: - raise MaxRetryError(self, url, e) + if isinstance(e, SocketError) and self.proxy is not None: + raise ProxyError('Cannot connect to proxy. ' + 'Socket error: %s.' % e) + else: + raise MaxRetryError(self, url, e) finally: if release_conn: @@ -565,7 +565,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -652,6 +652,10 @@ class HTTPSConnectionPool(HTTPConnectionPool): conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 return self._prepare_conn(conn) diff --git a/lib/requests/packages/urllib3/contrib/pyopenssl.py b/lib/requests/packages/urllib3/contrib/pyopenssl.py index f78e7170..d9bda15a 100644 --- a/lib/requests/packages/urllib3/contrib/pyopenssl.py +++ b/lib/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,4 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2. This needs the following packages installed: @@ -18,12 +18,31 @@ your application begins using ``urllib3``, like this:: Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +encryption in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. + Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256 + EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES + !MD5 !EXP !PSK !SRP !DSS'`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) + ''' from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder +from pyasn1.type import univ, constraint from socket import _fileobject import ssl import select @@ -50,6 +69,13 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } +# Default SSL/TLS cipher list. +# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/ +# configuring-apache-nginx-and-openssl-for-forward-secrecy +DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \ + 'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \ + 'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS' + orig_util_HAS_SNI = util.HAS_SNI orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket @@ -69,6 +95,17 @@ def extract_from_urllib3(): util.HAS_SNI = orig_util_HAS_SNI +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): + '''ASN.1 implementation for subjectAltNames support''' + + # There is no limit to how many SAN certificates a certificate may have, + # however this needs to have some limit so we'll set an arbitrarily high + # limit. + sizeSpec = univ.SequenceOf.sizeSpec + \ + constraint.ValueSizeConstraint(1, 1024) + + ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. def get_subj_alt_name(peer_cert): # Search through extensions @@ -330,6 +367,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + ctx.set_options(OP_NO_COMPRESSION) + + # Set list of supported ciphersuites. + ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) + cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) cnx.set_connect_state() diff --git a/lib/requests/packages/urllib3/filepost.py b/lib/requests/packages/urllib3/filepost.py index 4575582e..e8b30bdd 100644 --- a/lib/requests/packages/urllib3/filepost.py +++ b/lib/requests/packages/urllib3/filepost.py @@ -46,16 +46,15 @@ def iter_field_objects(fields): def iter_fields(fields): """ + .. deprecated:: 1.6 + Iterate over fields. - .. deprecated :: - - The addition of `~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - `~urllib3.fields.RequestField` objects, instead. + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. Supports list of (k, v) tuples and dicts. - """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) diff --git a/lib/requests/packages/urllib3/poolmanager.py b/lib/requests/packages/urllib3/poolmanager.py index c16519f8..f18ff2bb 100644 --- a/lib/requests/packages/urllib3/poolmanager.py +++ b/lib/requests/packages/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -176,7 +176,7 @@ class ProxyManager(PoolManager): Behaves just like :class:`PoolManager`, but sends all requests through the defined proxy, using the CONNECT method for HTTPS URLs. - :param poxy_url: + :param proxy_url: The URL of the proxy to be used. :param proxy_headers: diff --git a/lib/requests/packages/urllib3/util.py b/lib/requests/packages/urllib3/util.py index 46a0c48d..bd266317 100644 --- a/lib/requests/packages/urllib3/util.py +++ b/lib/requests/packages/urllib3/util.py @@ -620,6 +620,11 @@ if SSLContext is not None: # Python 3.2+ """ context = SSLContext(ssl_version) context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + if ca_certs: try: context.load_verify_locations(ca_certs) diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index 843e3ae1..425db22c 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -15,9 +15,9 @@ from datetime import datetime from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) -from .models import Request, PreparedRequest +from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook -from .utils import to_key_val_list, default_headers +from .utils import to_key_val_list, default_headers, to_native_string from .exceptions import TooManyRedirects, InvalidSchema from .structures import CaseInsensitiveDict @@ -26,13 +26,9 @@ from .adapters import HTTPAdapter from .utils import requote_uri, get_environ_proxies, get_netrc_auth from .status_codes import codes -REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_moved, # 307 -) -DEFAULT_REDIRECT_LIMIT = 30 + +# formerly defined here, reexposed here for backward compatibility +from .models import REDIRECT_STATI def merge_setting(request_setting, session_setting, dict_class=OrderedDict): @@ -63,6 +59,8 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): if v is None: del merged_setting[k] + merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) + return merged_setting @@ -89,8 +87,7 @@ class SessionRedirectMixin(object): i = 0 - # ((resp.status_code is codes.see_other)) - while ('location' in resp.headers and resp.status_code in REDIRECT_STATI): + while resp.is_redirect: prepared_request = req.copy() resp.content # Consume socket so it can be released @@ -121,7 +118,7 @@ class SessionRedirectMixin(object): else: url = requote_uri(url) - prepared_request.url = url + prepared_request.url = to_native_string(url) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if (resp.status_code == codes.see_other and @@ -153,11 +150,24 @@ class SessionRedirectMixin(object): except KeyError: pass - extract_cookies_to_jar(prepared_request._cookies, - prepared_request, resp.raw) + extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) + if 'Authorization' in headers: + # If we get redirected to a new host, we should strip out any + # authentication headers. + original_parsed = urlparse(resp.request.url) + redirect_parsed = urlparse(url) + + if (original_parsed.hostname != redirect_parsed.hostname): + del headers['Authorization'] + + # .netrc might have more auth for us. + new_auth = get_netrc_auth(url) if self.trust_env else None + if new_auth is not None: + prepared_request.prepare_auth(new_auth) + resp = self.send( prepared_request, stream=stream, @@ -291,7 +301,7 @@ class Session(SessionRedirectMixin): def request(self, method, url, params=None, data=None, - headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'}, + headers=None, cookies=None, files=None, auth=None, @@ -320,7 +330,7 @@ class Session(SessionRedirectMixin): :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) Float describing the timeout of the - request. + request in seconds. :param allow_redirects: (optional) Boolean. Set to True by default. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. @@ -467,8 +477,7 @@ class Session(SessionRedirectMixin): if not isinstance(request, PreparedRequest): raise ValueError('You can only send PreparedRequests.') - # Set up variables needed for resolve_redirects and dispatching of - # hooks + # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') timeout = kwargs.get('timeout') @@ -482,8 +491,10 @@ class Session(SessionRedirectMixin): # Start time (approximately) of the request start = datetime.utcnow() + # Send the request r = adapter.send(request, **kwargs) + # Total elapsed time of the request (approximately) r.elapsed = datetime.utcnow() - start @@ -492,15 +503,20 @@ class Session(SessionRedirectMixin): # Persist cookies if r.history: + # If the hooks create history then we want those cookies too for resp in r.history: extract_cookies_to_jar(self.cookies, resp.request, resp.raw) + extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. - gen = self.resolve_redirects(r, request, stream=stream, - timeout=timeout, verify=verify, cert=cert, - proxies=proxies) + gen = self.resolve_redirects(r, request, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies) # Resolve redirects if allowed. history = [resp for resp in gen] if allow_redirects else [] @@ -534,8 +550,10 @@ class Session(SessionRedirectMixin): """Registers a connection adapter to a prefix. Adapters are sorted in descending order by key length.""" + self.adapters[prefix] = adapter keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] + for key in keys_to_move: self.adapters[key] = self.adapters.pop(key) diff --git a/lib/requests/utils.py b/lib/requests/utils.py index 7b7ff0a7..4d648bc5 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -548,7 +548,7 @@ def default_user_agent(name="python-requests"): def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), - 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), + 'Accept-Encoding': ', '.join(('gzip', 'deflate')), 'Accept': '*/*' }) diff --git a/lib/tvdb_api/tvdb_api.py b/lib/tvdb_api/tvdb_api.py index b431f2f6..9d9fcd53 100644 --- a/lib/tvdb_api/tvdb_api.py +++ b/lib/tvdb_api/tvdb_api.py @@ -20,7 +20,6 @@ __version__ = "1.9" import os import time import urllib -import urllib2 import getpass import StringIO import tempfile @@ -39,8 +38,11 @@ try: except ImportError: gzip = None +from lib import requests +from urlparse import urlparse, urlsplit +from lib.cachecontrol.wrapper import CacheControl +from lib.cachecontrol.caches.file_cache import FileCache -from tvdb_cache import CacheHandler from tvdb_ui import BaseUI, ConsoleUI from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound, @@ -435,26 +437,13 @@ class Tvdb: if cache is True: self.config['cache_enabled'] = True self.config['cache_location'] = self._getTempDir() - self.urlopener = urllib2.build_opener( - CacheHandler(self.config['cache_location']) - ) elif cache is False: self.config['cache_enabled'] = False - self.urlopener = urllib2.build_opener() # default opener with no caching elif isinstance(cache, basestring): self.config['cache_enabled'] = True self.config['cache_location'] = cache - self.urlopener = urllib2.build_opener( - CacheHandler(self.config['cache_location']) - ) - - elif isinstance(cache, urllib2.OpenerDirector): - # If passed something from urllib2.build_opener, use that - log().debug("Using %r as urlopener" % cache) - self.config['cache_enabled'] = True - self.urlopener = cache else: raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) @@ -501,9 +490,11 @@ class Tvdb: self.config['base_url'] = "http://thetvdb.com" if self.config['search_all_languages']: - self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php?seriesname=%%s&language=all" % self.config + self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php" % self.config + self.config['params_getSeries'] = {"seriesname": "", "language": "all"} else: - self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php?seriesname=%%s&language=%(language)s" % self.config + self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php" % self.config + self.config['params_getSeries'] = {"seriesname": "", "language": ""} self.config['url_epInfo'] = u"%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.xml" % self.config self.config['url_epInfo_zip'] = u"%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.zip" % self.config @@ -529,78 +520,29 @@ class Tvdb: return os.path.join(tempfile.gettempdir(), "tvdb_api-%s" % (uid)) - def retry(ExceptionToCheck, default=None, tries=4, delay=3, backoff=2, logger=None): - """Retry calling the decorated function using an exponential backoff. - - http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ - original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - - :param ExceptionToCheck: the exception to check. may be a tuple of - excpetions to check - :type ExceptionToCheck: Exception or tuple - :param tries: number of times to try (not retry) before giving up - :type tries: int - :param delay: initial delay between retries in seconds - :type delay: int - :param backoff: backoff multiplier e.g. value of 2 will double the delay - each retry - :type backoff: int - :param logger: logger to use. If None, print - :type logger: logging.Logger instance - """ - def deco_retry(f): - def f_retry(*args, **kwargs): - mtries, mdelay = tries, delay - try_one_last_time = True - while mtries > 1: - try: - print args,kwargs - return f(*args, **kwargs) - try_one_last_time = False - break - except ExceptionToCheck, e: - msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) - if logger: - logger.warning(msg) - else: - print msg - time.sleep(mdelay) - mtries -= 1 - mdelay *= backoff - if try_one_last_time: - try: - return f(*args, **kwargs) - except ExceptionToCheck, e: - return default - return - return f_retry # true decorator - return deco_retry - - @retry(urllib2.URLError, tries=4, delay=3, backoff=2) - def _loadUrl(self, url, recache = False, language=None): + def _loadUrl(self, url, params=None, language=None): global lastTimeout try: log().debug("Retrieving URL %s" % url) - resp = self.urlopener.open(url) - if 'x-local-cache' in resp.headers: - log().debug("URL %s was cached in %s" % ( - url, - resp.headers['x-local-cache']) - ) - if recache: - log().debug("Attempting to recache %s" % url) - resp.recache() - except (IOError, urllib2.URLError), errormsg: - if not str(errormsg).startswith('HTTP Error'): - lastTimeout = datetime.datetime.now() - raise tvdb_error("Could not connect to server: %s" % (errormsg)) - + # cacheControl + if self.config['cache_enabled']: + sess = CacheControl(requests.Session(), cache=FileCache(self.config['cache_location'])) + else: + sess = requests.Session() + + # get response from TVRage + resp = sess.get(url, params=params) + except Exception, e: + if not str(e).startswith('HTTP Error'): + lastTimeout = datetime.datetime.now() + raise tvdb_error("Could not connect to server: %s" % (e)) + # handle gzipped content, # http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch if 'gzip' in resp.headers.get("Content-Encoding", ''): if gzip: - stream = StringIO.StringIO(resp.read()) + stream = StringIO.StringIO(resp.content) gz = gzip.GzipFile(fileobj=stream) return gz.read() @@ -611,26 +553,24 @@ class Tvdb: # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20] log().debug("We recived a zip file unpacking now ...") zipdata = StringIO.StringIO() - zipdata.write(resp.read()) + zipdata.write(resp.content) myzipfile = zipfile.ZipFile(zipdata) return myzipfile.read('%s.xml' % language) except zipfile.BadZipfile: - if 'x-local-cache' in resp.headers: - resp.delete_cache() raise tvdb_error("Bad zip file received from thetvdb.com, could not read it") - return resp.read() + return resp.content - def _getetsrc(self, url, language=None): + def _getetsrc(self, url, params=None, language=None): """Loads a URL using caching, returns an ElementTree of the source """ - src = self._loadUrl(url, language=language) + src = self._loadUrl(url, params=params, language=language) try: # TVDB doesn't sanitize \r (CR) from user input in some fields, # remove it to avoid errors. Change from SickBeard, from will14m return ElementTree.fromstring(src.rstrip("\r")) except SyntaxError: - src = self._loadUrl(url, recache=True, language=language) + src = self._loadUrl(url, params=None, language=language) try: return ElementTree.fromstring(src.rstrip("\r")) except SyntaxError, exceptionmsg: @@ -694,7 +634,8 @@ class Tvdb: """ series = urllib.quote(series.encode("utf-8")) log().debug("Searching for show %s" % series) - seriesEt = self._getetsrc(self.config['url_getSeries'] % (series)) + self.config['params_getSeries']['seriesname'] = series + seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']) allSeries = [] for series in seriesEt: result = dict((k.tag.lower(), k.text) for k in series.getchildren()) diff --git a/lib/tvrage_api/tvrage_api.py b/lib/tvrage_api/tvrage_api.py index 79d2bad8..9eed27a0 100644 --- a/lib/tvrage_api/tvrage_api.py +++ b/lib/tvrage_api/tvrage_api.py @@ -16,7 +16,6 @@ import os import re import time import urllib -import urllib2 import getpass import tempfile import warnings @@ -28,10 +27,12 @@ try: except ImportError: import xml.etree.ElementTree as ElementTree -from lib import requests from lib.dateutil.parser import parse -from tvrage_cache import CacheHandler +from lib import requests +from lib.cachecontrol.wrapper import CacheControl +from lib.cachecontrol.caches.file_cache import FileCache + from tvrage_ui import BaseUI from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound) @@ -276,26 +277,13 @@ class TVRage: if cache is True: self.config['cache_enabled'] = True self.config['cache_location'] = self._getTempDir() - self.urlopener = urllib2.build_opener( - CacheHandler(self.config['cache_location']) - ) elif cache is False: self.config['cache_enabled'] = False - self.urlopener = urllib2.build_opener() # default opener with no caching elif isinstance(cache, basestring): self.config['cache_enabled'] = True self.config['cache_location'] = cache - self.urlopener = urllib2.build_opener( - CacheHandler(self.config['cache_location']) - ) - - elif isinstance(cache, urllib2.OpenerDirector): - # If passed something from urllib2.build_opener, use that - log().debug("Using %r as urlopener" % cache) - self.config['cache_enabled'] = True - self.urlopener = cache else: raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) @@ -336,13 +324,17 @@ class TVRage: # The following url_ configs are based of the # http://tvrage.com/wiki/index.php/Programmers_API + self.config['base_url'] = "http://services.tvrage.com" - self.config['url_getSeries'] = u"%(base_url)s/myfeeds/search.php?key=%(apikey)s&show=%%s" % self.config + self.config['url_getSeries'] = u"%(base_url)s/myfeeds/search.php" % self.config + self.config['params_getSeries'] = {"key": self.config['apikey'], "show": ""} - self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php?key=%(apikey)s&sid=%%s" % self.config + self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php" % self.config + self.config['params_epInfo'] = {"key": self.config['apikey'], "sid": ""} - self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php?key=%(apikey)s&sid=%%s" % self.config + self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php" % self.config + self.config['params_seriesInfo'] = {"key": self.config['apikey'], "sid": ""} def _getTempDir(self): """Returns the [system temp dir]/tvrage_api-u501 (or @@ -359,76 +351,27 @@ class TVRage: return os.path.join(tempfile.gettempdir(), "tvrage_api-%s" % (uid)) - def retry(ExceptionToCheck, default=None, tries=4, delay=3, backoff=2, logger=None): - """Retry calling the decorated function using an exponential backoff. - - http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ - original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry - - :param ExceptionToCheck: the exception to check. may be a tuple of - excpetions to check - :type ExceptionToCheck: Exception or tuple - :param tries: number of times to try (not retry) before giving up - :type tries: int - :param delay: initial delay between retries in seconds - :type delay: int - :param backoff: backoff multiplier e.g. value of 2 will double the delay - each retry - :type backoff: int - :param logger: logger to use. If None, print - :type logger: logging.Logger instance - """ - def deco_retry(f): - def f_retry(*args, **kwargs): - mtries, mdelay = tries, delay - try_one_last_time = True - while mtries > 1: - try: - print args,kwargs - return f(*args, **kwargs) - try_one_last_time = False - break - except ExceptionToCheck, e: - msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) - if logger: - logger.warning(msg) - else: - print msg - time.sleep(mdelay) - mtries -= 1 - mdelay *= backoff - if try_one_last_time: - try: - return f(*args, **kwargs) - except ExceptionToCheck, e: - return default - return - return f_retry # true decorator - return deco_retry - - @retry(urllib2.URLError, tries=4, delay=3, backoff=2) - def _loadUrl(self, url, recache = False): + def _loadUrl(self, url, params=None): global lastTimeout try: log().debug("Retrieving URL %s" % url) - #resp = self.urlopener.open(url) - resp = requests.get(url) - if 'x-local-cache' in resp.headers: - log().debug("URL %s was cached in %s" % ( - url, - resp.headers['x-local-cache']) - ) - if recache: - log().debug("Attempting to recache %s" % url) - resp.recache() - except (IOError, urllib2.URLError), errormsg: - if not str(errormsg).startswith('HTTP Error'): + + # cacheControl + if self.config['cache_enabled']: + sess = CacheControl(requests.Session(), cache=FileCache(self.config['cache_location'])) + else: + sess = requests.Session() + + # get response from TVRage + resp = sess.get(url, params=params) + except Exception, e: + if not str(e).startswith('HTTP Error'): lastTimeout = dt.datetime.now() - raise tvrage_error("Could not connect to server: %s" % (errormsg)) + raise tvrage_error("Could not connect to server: %s" % (e)) return resp.content - def _getetsrc(self, url): + def _getetsrc(self, url, params=None): """Loads a URL using caching, returns an ElementTree of the source """ reDict = { @@ -449,7 +392,7 @@ class TVRage: } robj = re.compile('|'.join(reDict.keys())) - src = self._loadUrl(url) + src = self._loadUrl(url, params) try: # TVRAGE doesn't sanitize \r (CR) from user input in some fields, # remove it to avoid errors. Change from SickBeard, from will14m @@ -459,24 +402,30 @@ class TVRage: elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag) if elm.tag in 'firstaired': - fixDate = parse(elm.text) - value = fixDate.strftime("%Y-%m-%d") - - elm.text = value + if elm.text is "0000-00-00": + elm.text = str(dt.date.fromordinal(1)) + try: + fixDate = parse(elm.text, fuzzy=True) + elm.text = fixDate.strftime("%Y-%m-%d") + except: + pass return ElementTree.fromstring(ElementTree.tostring(xml)) except SyntaxError: - src = self._loadUrl(url, recache=True) + src = self._loadUrl(url, params) try: xml = ElementTree.fromstring(src.rstrip("\r")) tree = ElementTree.ElementTree(xml) for elm in tree.iter(): elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag) - if elm.tag in 'firstaired': - fixDate = parse(elm.text) - value = fixDate.strftime("%Y-%m-%d") - - elm.text = value + if elm.tag in 'firstaired' and elm.text: + if elm.text is "0000-00-00": + elm.text = str(dt.date.fromordinal(1)) + try: + fixDate = parse(elm.text, fuzzy=True) + elm.text = fixDate.strftime("%Y-%m-%d") + except: + pass return ElementTree.fromstring(ElementTree.tostring(xml)) except SyntaxError, exceptionmsg: errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % ( @@ -538,7 +487,8 @@ class TVRage: """ series = urllib.quote(series.encode("utf-8")) log().debug("Searching for show %s" % series) - seriesEt = self._getetsrc(self.config['url_getSeries'] % (series)) + self.config['params_getSeries']['show'] = series + seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']) allSeries = [] seriesResult = {} for series in seriesEt: @@ -580,8 +530,10 @@ class TVRage: # Parse show information log().debug('Getting all series data for %s' % (sid)) + self.config['params_seriesInfo']['sid'] = sid seriesInfoEt = self._getetsrc( - self.config['url_seriesInfo'] % (sid) + self.config['url_seriesInfo'], + self.config['params_seriesInfo'] ) for curInfo in seriesInfoEt: @@ -610,8 +562,8 @@ class TVRage: # Parse episode data log().debug('Getting all episodes of %s' % (sid)) - url = self.config['url_epInfo'] % (sid) - epsEt = self._getetsrc(url) + self.config['params_epInfo']['sid'] = sid + epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo']) for cur_list in epsEt.findall("Episodelist"): for cur_seas in cur_list: try: diff --git a/sickbeard/indexers/generic.py b/sickbeard/indexers/generic.py index 531d625a..353d4e0c 100644 --- a/sickbeard/indexers/generic.py +++ b/sickbeard/indexers/generic.py @@ -15,6 +15,9 @@ # # You should have received a copy of the GNU General Public License # along with Sick Beard. If not, see . +import os + +import sickbeard class GenericIndexer(object): def __init__(self, indexer): @@ -23,10 +26,10 @@ class GenericIndexer(object): INDEXER_TVDB = 'Tvdb' INDEXER_TVRAGE = 'TVRage' - INDEXER_NAME = {} - INDEXER_NAME[INDEXER_NONE] = '' - INDEXER_NAME[INDEXER_TVDB] = 'theTVDB' - INDEXER_NAME[INDEXER_TVRAGE] = 'TVRage' + INDEXERS = {} + INDEXERS[INDEXER_NONE] = '' + INDEXERS[INDEXER_TVDB] = 'theTVDB' + INDEXERS[INDEXER_TVRAGE] = 'TVRage' INDEXER_API_KEY = {} INDEXER_API_KEY[INDEXER_NONE] = '' @@ -57,6 +60,8 @@ class GenericIndexer(object): 'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, 'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30} - self.config['base_url'] = INDEXER_BASEURL[indexer] - self.config['api_parms'] = INDEXER_API_PARMS[indexer] - self.config['name'] = INDEXER_NAME[indexer] \ No newline at end of file + self.base_url = INDEXER_BASEURL[indexer] + self.api_parms = INDEXER_API_PARMS[indexer] + self.indexerName = INDEXERS[indexer] + self.cache = os.path.join(sickbeard.CACHE_DIR, indexer) + self.indexers = [indexer for indexer in INDEXERS] \ No newline at end of file diff --git a/sickbeard/indexers/indexer_api.py b/sickbeard/indexers/indexer_api.py index 77597b8c..6f47f71a 100644 --- a/sickbeard/indexers/indexer_api.py +++ b/sickbeard/indexers/indexer_api.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU General Public License # along with Sick Beard. If not, see . import os +import datetime import sickbeard import generic @@ -25,19 +26,18 @@ from lib.tvdb_api.tvdb_api import Tvdb from lib.tvrage_api.tvrage_api import TVRage class indexerApi(generic.GenericIndexer): - def __init__(self, *args, **kwargs): - indexer = kwargs.pop('indexer',None) + def __init__(self, indexer=None, *args, **kwargs): super(indexerApi, self).__init__(indexer) - self.name = self.config['name'] + self.name = self.indexerName if indexer: - self.config['api_parms'].update(**kwargs) + self.api_parms.update(**kwargs) if sickbeard.CACHE_DIR: - self.config['api_parms']['cache'] = os.path.join(sickbeard.CACHE_DIR, indexer) + self.api_parms['cache'] = self.cache # wrap the indexer API object and return it back - self._wrapped = eval(indexer)(*args, **self.config['api_parms']) + self._wrapped = eval(indexer)(*args, **self.api_parms) def __getattr__(self, attr): return getattr(self._wrapped, attr) diff --git a/sickbeard/indexers/indexer_config.py b/sickbeard/indexers/indexer_config.py deleted file mode 100644 index 3db1b41a..00000000 --- a/sickbeard/indexers/indexer_config.py +++ /dev/null @@ -1,29 +0,0 @@ -INDEXER_TVDB = 'Tvdb' -INDEXER_TVRAGE = 'TVRage' - -INDEXER_API_KEY = {} -INDEXER_API_KEY[INDEXER_TVDB] = '9DAF49C96CBF8DAC' -INDEXER_API_KEY[INDEXER_TVRAGE] = 'Uhewg1Rr0o62fvZvUIZt' - -INDEXER_BASEURL = {} -INDEXER_BASEURL[INDEXER_TVDB] = 'http://thetvdb.com/api/' + INDEXER_API_KEY[INDEXER_TVDB] -INDEXER_BASEURL[INDEXER_TVRAGE] = 'http://tvrage.com/feeds/' + INDEXER_API_KEY[INDEXER_TVRAGE] - -INDEXER_API_PARMS = {} -INDEXER_API_PARMS[INDEXER_TVDB] = {'apikey': INDEXER_API_KEY[INDEXER_TVDB], - 'language': 'en', - 'useZip': True} - -INDEXER_API_PARMS[INDEXER_TVRAGE] = {'apikey': INDEXER_API_KEY[INDEXER_TVRAGE], - 'language': 'en'} - - -INDEXER_CONFIG = {} -INDEXER_CONFIG['valid_languages'] = [ - "da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr", - "ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"] - -INDEXER_CONFIG['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27, -'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9, -'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, -'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30} \ No newline at end of file diff --git a/sickbeard/indexers/test/test.py b/sickbeard/indexers/test/test.py index e73c5bbb..75bc58c2 100644 --- a/sickbeard/indexers/test/test.py +++ b/sickbeard/indexers/test/test.py @@ -2,30 +2,19 @@ from __future__ import with_statement import unittest -import sqlite3 - import sys import os.path sys.path.append(os.path.abspath('..')) -sys.path.append(os.path.abspath('../lib')) +sys.path.append(os.path.abspath('../../../lib')) -import sickbeard -import shutil - -from sickbeard import encodingKludge as ek, providers, tvcache -from sickbeard import db -from sickbeard.databases import mainDB -from sickbeard.databases import cache_db - - -from indexer_api import indexerApi -from indexer_exceptions import indexer_exception +from sickbeard.indexers.indexer_api import indexerApi +from sickbeard.indexers.indexer_exceptions import indexer_exception class APICheck(unittest.TestCase): - indexer_id = 258171 - indexer = 'Tvdb' + indexer_id = 'Continum' + indexer = 'TVRage' # Set our common indexer_api options here - INDEXER_API_PARMS = {'apikey': '9DAF49C96CBF8DAC', + INDEXER_API_PARMS = {'apikey': 'Uhewg1Rr0o62fvZvUIZt', 'language': 'en', 'useZip': True} @@ -34,11 +23,9 @@ class APICheck(unittest.TestCase): lindexer_api_parms = INDEXER_API_PARMS.copy() try: - imdbid = " " - showurl = indexerApi(**lindexer_api_parms).config['base_url'] + indexer_id + '/all/en.zip' - t = indexerApi().config['valid_languages'] - t = indexerApi(**lindexer_api_parms) - myEp = t[258171] +# showurl = indexerApi(**lindexer_api_parms).config['base_url'] + str(indexer_id) + '/all/en.zip' + t = indexerApi(cache=True, **lindexer_api_parms) + myEp = t[indexer_id] if getattr(myEp, 'seriesname', None) is not None: print "FOUND" diff --git a/sickbeard/metadata/xbmc_12plus.py b/sickbeard/metadata/xbmc_12plus.py index d0b4d5da..063251ed 100644 --- a/sickbeard/metadata/xbmc_12plus.py +++ b/sickbeard/metadata/xbmc_12plus.py @@ -153,7 +153,7 @@ class XBMC_12PlusMetadata(generic.GenericMetadata): episodeguideurl = etree.SubElement(episodeguide, "url") episodeguideurl2 = etree.SubElement(tv_node, "episodeguideurl") if getattr(myShow, 'id', None) is not None: - showurl = t.config['base_url'] + myShow["id"] + '/all/en.zip' + showurl = t.base_url + myShow["id"] + '/all/en.zip' episodeguideurl.text = showurl episodeguideurl2.text = showurl diff --git a/sickbeard/showUpdater.py b/sickbeard/showUpdater.py index 44994ee6..d2fa2f87 100644 --- a/sickbeard/showUpdater.py +++ b/sickbeard/showUpdater.py @@ -28,6 +28,7 @@ from sickbeard.exceptions import ex from sickbeard import encodingKludge as ek from sickbeard import db +from indexers.indexer_api import indexerApi class ShowUpdater(): @@ -39,7 +40,7 @@ class ShowUpdater(): # update at 3 AM run_updater_time = datetime.time(hour=3) - update_datetime = datetime.datetime.today() + update_datetime = datetime.time.today() update_date = update_datetime.date() logger.log(u"Checking update interval", logger.DEBUG) @@ -54,29 +55,30 @@ class ShowUpdater(): # clean out cache directory, remove everything > 12 hours old if sickbeard.CACHE_DIR: - cache_dir = sickbeard.INDEXER_API_PARMS['cache'] - logger.log(u"Trying to clean cache folder " + cache_dir) + for indexer in indexerApi().indexers: + cache_dir = indexerApi(indexer=indexer).cache + logger.log(u"Trying to clean cache folder " + cache_dir) - # Does our cache_dir exists - if not ek.ek(os.path.isdir, cache_dir): - logger.log(u"Can't clean " + cache_dir + " if it doesn't exist", logger.WARNING) - else: - max_age = datetime.timedelta(hours=12) - # Get all our cache files - cache_files = ek.ek(os.listdir, cache_dir) + # Does our cache_dir exists + if not ek.ek(os.path.isdir, cache_dir): + logger.log(u"Can't clean " + cache_dir + " if it doesn't exist", logger.WARNING) + else: + max_age = datetime.timedelta(hours=12) + # Get all our cache files + cache_files = ek.ek(os.listdir, cache_dir) - for cache_file in cache_files: - cache_file_path = ek.ek(os.path.join, cache_dir, cache_file) + for cache_file in cache_files: + cache_file_path = ek.ek(os.path.join, cache_dir, cache_file) - if ek.ek(os.path.isfile, cache_file_path): - cache_file_modified = datetime.datetime.fromtimestamp(ek.ek(os.path.getmtime, cache_file_path)) + if ek.ek(os.path.isfile, cache_file_path): + cache_file_modified = datetime.datetime.fromtimestamp(ek.ek(os.path.getmtime, cache_file_path)) - if update_datetime - cache_file_modified > max_age: - try: - ek.ek(os.remove, cache_file_path) - except OSError, e: - logger.log(u"Unable to clean " + cache_dir + ": " + repr(e) + " / " + str(e), logger.WARNING) - break + if update_datetime - cache_file_modified > max_age: + try: + ek.ek(os.remove, cache_file_path) + except OSError, e: + logger.log(u"Unable to clean " + cache_dir + ": " + repr(e) + " / " + str(e), logger.WARNING) + break # select 10 'Ended' tv_shows updated more than 90 days ago to include in this update stale_should_update = [] diff --git a/sickbeard/show_queue.py b/sickbeard/show_queue.py index 375b839a..2c7f798b 100644 --- a/sickbeard/show_queue.py +++ b/sickbeard/show_queue.py @@ -451,7 +451,7 @@ class QueueItemUpdate(ShowQueueItem): logger.log(u"Retrieving show info from " + self.show.indexer + "", logger.DEBUG) try: - self.show.loadFromIndexer(cache=not self.force) + self.show.loadFromIndexer(cache=self.force) except indexer_exceptions.indexer_error, e: logger.log(u"Unable to contact " + self.show.indexer + ", aborting: " + ex(e), logger.WARNING) return