1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00
wget/testenv/HTTPServer.py
Darshit Shah 7cbe8141d1 Introducing Python based Test Environment
Squashed Commit, of the following commits:

7743384 Update documentation to reflect changes in code
b703633 Add feature that allows to ensure that Wget correctly crawls the website in recursive mode
0758f47 Add new test for recursive spider mode
43bb61b Smartly guess content type header
d4d0e63 Support substring replace in File Contents too
f578500 Compatibility fix with multiple servers
8b1a9b6 Extend Functionality to support spawning multiple servers
e84192a Use the provided calls to shutdown server instead of rewriting it
99659f3 Improve Documentation
cb94e52 Slight code cleanup. Remove unused code
886ac1a Shift to new Threading Model from Multiprocessing model
e74c2ec Add new test for POST Requests
48644f1 Print diff when file contents don't match
b6f9efe Add tests for Cookie support
4c9e6b4 Document pending work
e13bc90 Add new test to ensure Content Disposition and Auth work together
60d1f4d Add new Test for Continue command
738b299 Add test, Test-Head
9b9d16b Edit non-unique TEST_NAME variable
ae958db Minor optimizations to the way Server Rules are executed
50b4f0c The rules need not be a defaultdict.
dccc154 Introducing Python based Test Environment
2014-07-24 16:51:58 +05:30

431 lines
16 KiB
Python

from http.server import HTTPServer, BaseHTTPRequestHandler
from posixpath import basename, splitext
from base64 import b64encode
from random import random
from hashlib import md5
import threading
import re
class InvalidRangeHeader (Exception):
""" Create an Exception for handling of invalid Range Headers. """
# TODO: Eliminate this exception and use only ServerError
def __init__ (self, err_message):
self.err_message = err_message
class ServerError (Exception):
def __init__ (self, err_message):
self.err_message = err_message
class StoppableHTTPServer (HTTPServer):
request_headers = list ()
""" Define methods for configuring the Server. """
def server_conf (self, filelist, conf_dict):
""" Set Server Rules and File System for this instance. """
self.server_configs = conf_dict
self.fileSys = filelist
def get_req_headers (self):
return self.request_headers
class WgetHTTPRequestHandler (BaseHTTPRequestHandler):
""" Define methods for handling Test Checks. """
def get_rule_list (self, name):
r_list = self.rules.get (name) if name in self.rules else None
return r_list
class _Handler (WgetHTTPRequestHandler):
""" Define Handler Methods for different Requests. """
InvalidRangeHeader = InvalidRangeHeader
protocol_version = 'HTTP/1.1'
""" Define functions for various HTTP Requests. """
def do_HEAD (self):
self.send_head ("HEAD")
def do_GET (self):
content, start = self.send_head ("GET")
if content:
if start is None:
self.wfile.write (content.encode ('utf-8'))
else:
self.wfile.write (content.encode ('utf-8')[start:])
def do_POST (self):
path = self.path[1:]
self.rules = self.server.server_configs.get (path)
if not self.custom_response ():
return (None, None)
if path in self.server.fileSys:
body_data = self.get_body_data ()
self.send_response (200)
self.send_header ("Content-type", "text/plain")
content = self.server.fileSys.pop (path) + "\n" + body_data
total_length = len (content)
self.server.fileSys[path] = content
self.send_header ("Content-Length", total_length)
self.finish_headers ()
try:
self.wfile.write (content.encode ('utf-8'))
except Exception:
pass
else:
self.send_put (path)
def do_PUT (self):
path = self.path[1:]
self.rules = self.server.server_configs.get (path)
if not self.custom_response ():
return (None, None)
self.server.fileSys.pop (path, None)
self.send_put (path)
""" End of HTTP Request Method Handlers. """
""" Helper functions for the Handlers. """
def parse_range_header (self, header_line, length):
if header_line is None:
return None
if not header_line.startswith ("bytes="):
raise InvalidRangeHeader ("Cannot parse header Range: %s" %
(header_line))
regex = re.match (r"^bytes=(\d*)\-$", header_line)
range_start = int (regex.group (1))
if range_start >= length:
raise InvalidRangeHeader ("Range Overflow")
return range_start
def get_body_data (self):
cLength_header = self.headers.get ("Content-Length")
cLength = int (cLength_header) if cLength_header is not None else 0
body_data = self.rfile.read (cLength).decode ('utf-8')
return body_data
def send_put (self, path):
body_data = self.get_body_data ()
self.send_response (201)
self.server.fileSys[path] = body_data
self.send_header ("Content-type", "text/plain")
self.send_header ("Content-Length", len (body_data))
self.finish_headers ()
try:
self.wfile.write (body_data.encode ('utf-8'))
except Exception:
pass
def SendHeader (self, header_obj):
pass
# headers_list = header_obj.headers
# for header_line in headers_list:
# print (header_line + " : " + headers_list[header_line])
# self.send_header (header_line, headers_list[header_line])
def send_cust_headers (self):
header_obj = self.get_rule_list ('SendHeader')
if header_obj:
for header in header_obj.headers:
self.send_header (header, header_obj.headers[header])
def finish_headers (self):
self.send_cust_headers ()
self.end_headers ()
def Response (self, resp_obj):
self.send_response (resp_obj.response_code)
self.finish_headers ()
raise ServerError ("Custom Response code sent.")
def custom_response (self):
codes = self.get_rule_list ('Response')
if codes:
self.send_response (codes.response_code)
self.finish_headers ()
return False
else:
return True
def base64 (self, data):
string = b64encode (data.encode ('utf-8'))
return string.decode ('utf-8')
def send_challenge (self, auth_type):
if auth_type == "Both":
self.send_challenge ("Digest")
self.send_challenge ("Basic")
return
if auth_type == "Basic":
challenge_str = 'Basic realm="Wget-Test"'
elif auth_type == "Digest" or auth_type == "Both_inline":
self.nonce = md5 (str (random ()).encode ('utf-8')).hexdigest ()
self.opaque = md5 (str (random ()).encode ('utf-8')).hexdigest ()
challenge_str = 'Digest realm="Test", nonce="%s", opaque="%s"' %(
self.nonce,
self.opaque)
challenge_str += ', qop="auth"'
if auth_type == "Both_inline":
challenge_str = 'Basic realm="Wget-Test", ' + challenge_str
self.send_header ("WWW-Authenticate", challenge_str)
def authorize_Basic (self, auth_header, auth_rule):
if auth_header is None or auth_header.split(' ')[0] != 'Basic':
return False
else:
self.user = auth_rule.auth_user
self.passw = auth_rule.auth_pass
auth_str = "Basic " + self.base64 (self.user + ":" + self.passw)
return True if auth_str == auth_header else False
def parse_auth_header (self, auth_header):
n = len("Digest ")
auth_header = auth_header[n:].strip()
items = auth_header.split(", ")
key_values = [i.split("=", 1) for i in items]
key_values = [(k.strip(), v.strip().replace('"', '')) for k, v in key_values]
return dict(key_values)
def KD (self, secret, data):
return self.H (secret + ":" + data)
def H (self, data):
return md5 (data.encode ('utf-8')).hexdigest ()
def A1 (self):
return "%s:%s:%s" % (self.user, "Test", self.passw)
def A2 (self, params):
return "%s:%s" % (self.command, params["uri"])
def check_response (self, params):
if "qop" in params:
data_str = params['nonce'] \
+ ":" + params['nc'] \
+ ":" + params['cnonce'] \
+ ":" + params['qop'] \
+ ":" + self.H (self.A2 (params))
else:
data_str = params['nonce'] + ":" + self.H (self.A2 (params))
resp = self.KD (self.H (self.A1 ()), data_str)
return True if resp == params['response'] else False
def authorize_Digest (self, auth_header, auth_rule):
if auth_header is None or auth_header.split(' ')[0] != 'Digest':
return False
else:
self.user = auth_rule.auth_user
self.passw = auth_rule.auth_pass
params = self.parse_auth_header (auth_header)
pass_auth = True
if self.user != params['username'] or \
self.nonce != params['nonce'] or self.opaque != params['opaque']:
pass_auth = False
req_attribs = ['username', 'realm', 'nonce', 'uri', 'response']
for attrib in req_attribs:
if not attrib in params:
pass_auth = False
if not self.check_response (params):
pass_auth = False
return pass_auth
def authorize_Both (self, auth_header, auth_rule):
return False
def authorize_Both_inline (self, auth_header, auth_rule):
return False
def Authentication (self, auth_rule):
try:
self.handle_auth (auth_rule)
except ServerError as se:
self.send_response (401, "Authorization Required")
self.send_challenge (auth_rule.auth_type)
self.finish_headers ()
raise ServerError (se.__str__())
def handle_auth (self, auth_rule):
is_auth = True
auth_header = self.headers.get ("Authorization")
required_auth = auth_rule.auth_type
if required_auth == "Both" or required_auth == "Both_inline":
auth_type = auth_header.split(' ')[0] if auth_header else required_auth
else:
auth_type = required_auth
assert hasattr (self, "authorize_" + auth_type)
is_auth = getattr (self, "authorize_" + auth_type) (auth_header, auth_rule)
if is_auth is False:
raise ServerError ("Unable to Authenticate")
def is_authorized (self):
is_auth = True
auth_rule = self.get_rule_list ('Authentication')
if auth_rule:
auth_header = self.headers.get ("Authorization")
req_auth = auth_rule.auth_type
if req_auth == "Both" or req_auth == "Both_inline":
auth_type = auth_header.split(' ')[0] if auth_header else req_auth
else:
auth_type = req_auth
assert hasattr (self, "authorize_" + auth_type)
is_auth = getattr (self, "authorize_" + auth_type) (auth_header, auth_rule)
if is_auth is False:
self.send_response (401)
self.send_challenge (auth_type)
self.finish_headers ()
return is_auth
def ExpectHeader (self, header_obj):
exp_headers = header_obj.headers
for header_line in exp_headers:
header_recd = self.headers.get (header_line)
if header_recd is None or header_recd != exp_headers[header_line]:
self.send_error (400, "Expected Header " + header_line + " not found")
self.finish_headers ()
raise ServerError ("Header " + header_line + " not found")
def expect_headers (self):
""" This is modified code to handle a few changes. Should be removed ASAP """
exp_headers_obj = self.get_rule_list ('ExpectHeader')
if exp_headers_obj:
exp_headers = exp_headers_obj.headers
for header_line in exp_headers:
header_re = self.headers.get (header_line)
if header_re is None or header_re != exp_headers[header_line]:
self.send_error (400, 'Expected Header not Found')
self.end_headers ()
return False
return True
def RejectHeader (self, header_obj):
rej_headers = header_obj.headers
for header_line in rej_headers:
header_recd = self.headers.get (header_line)
if header_recd is not None and header_recd == rej_headers[header_line]:
self.send_error (400, 'Blackisted Header ' + header_line + ' received')
self.finish_headers ()
raise ServerError ("Header " + header_line + ' received')
def reject_headers (self):
rej_headers = self.get_rule_list ("RejectHeader")
if rej_headers:
rej_headers = rej_headers.headers
for header_line in rej_headers:
header_re = self.headers.get (header_line)
if header_re is not None and header_re == rej_headers[header_line]:
self.send_error (400, 'Blacklisted Header was Sent')
self.end_headers ()
return False
return True
def __log_request (self, method):
req = method + " " + self.path
self.server.request_headers.append (req)
def send_head (self, method):
""" Common code for GET and HEAD Commands.
This method is overriden to use the fileSys dict.
The method variable contains whether this was a HEAD or a GET Request.
According to RFC 2616, the server should not differentiate between
the two requests, however, we use it here for a specific test.
"""
if self.path == "/":
path = "index.html"
else:
path = self.path[1:]
self.__log_request (method)
if path in self.server.fileSys:
self.rules = self.server.server_configs.get (path)
for rule_name in self.rules:
try:
assert hasattr (self, rule_name)
getattr (self, rule_name) (self.rules [rule_name])
except AssertionError as ae:
msg = "Method " + rule_name + " not defined"
self.send_error (500, msg)
return (None, None)
except ServerError as se:
print (se.__str__())
return (None, None)
content = self.server.fileSys.get (path)
content_length = len (content)
try:
self.range_begin = self.parse_range_header (
self.headers.get ("Range"), content_length)
except InvalidRangeHeader as ae:
# self.log_error("%s", ae.err_message)
if ae.err_message == "Range Overflow":
self.send_response (416)
self.finish_headers ()
return (None, None)
else:
self.range_begin = None
if self.range_begin is None:
self.send_response (200)
else:
self.send_response (206)
self.send_header ("Accept-Ranges", "bytes")
self.send_header ("Content-Range",
"bytes %d-%d/%d" % (self.range_begin,
content_length - 1,
content_length))
content_length -= self.range_begin
cont_type = self.guess_type (path)
self.send_header ("Content-type", cont_type)
self.send_header ("Content-Length", content_length)
self.finish_headers ()
return (content, self.range_begin)
else:
self.send_error (404, "Not Found")
return (None, None)
def guess_type (self, path):
base_name = basename ("/" + path)
name, ext = splitext (base_name)
extension_map = {
".txt" : "text/plain",
".css" : "text/css",
".html" : "text/html"
}
if ext in extension_map:
return extension_map[ext]
else:
return "text/plain"
class HTTPd (threading.Thread):
server_class = StoppableHTTPServer
handler = _Handler
def __init__ (self, addr=None):
threading.Thread.__init__ (self)
if addr is None:
addr = ('localhost', 0)
self.server_inst = self.server_class (addr, self.handler)
self.server_address = self.server_inst.socket.getsockname()[:2]
def run (self):
self.server_inst.serve_forever ()
def server_conf (self, file_list, server_rules):
self.server_inst.server_conf (file_list, server_rules)
# vim: set ts=8 sts=4 sw=3 tw=0 et :