From 804ae3b864e1fe47ea38ac1a2283019387c33ac0 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Sat, 5 Mar 2016 10:25:03 +0100 Subject: Step 11: content encoding (only gzip for the moment) --- project1/proj1_s4498062/webhttp/composer.py | 48 +++++++++++++++-------- project1/proj1_s4498062/webhttp/config.py | 7 +++- project1/proj1_s4498062/webhttp/encodings.py | 58 ++++++++++++++++++++++++++++ project1/proj1_s4498062/webhttp/message.py | 41 +++++++++++++++++++- project1/proj1_s4498062/webhttp/regexes.py | 5 +++ project1/proj1_s4498062/webhttp/resource.py | 11 +++++- 6 files changed, 147 insertions(+), 23 deletions(-) create mode 100644 project1/proj1_s4498062/webhttp/encodings.py (limited to 'project1/proj1_s4498062/webhttp') diff --git a/project1/proj1_s4498062/webhttp/composer.py b/project1/proj1_s4498062/webhttp/composer.py index f9bc0e0..1c61628 100644 --- a/project1/proj1_s4498062/webhttp/composer.py +++ b/project1/proj1_s4498062/webhttp/composer.py @@ -8,6 +8,7 @@ import re import time from config import config +import encodings from message import Response from resource import Resource, FileExistError, FileAccessError import weblogging as logging @@ -44,34 +45,47 @@ class ResponseComposer: return self.serve(request.uri, request=request) - def serve(self, uri, code=200, etag=None, request=None, error_page=False): - response = Response() + def serve(self, uri, code=200, etag=None, request=None): + resp = Response() try: resource = Resource(uri) req = request if req != None and ( resource.etag_match(req.get_header('If-None-Match')) or \ not resource.etag_match(req.get_header('If-Match') or '*')): - response.code = 304 + resp.code = 304 else: - response.code = code - response.body = resource.get_content() - response.set_header('ETag', resource.generate_etag()) - response.set_header('Connection', 'close') - response.set_header('Content-Type', resource.get_content_type()) - response.set_content_length() + resp.code = code + resp.body = None + encs = req.encodings() if req != None else [encodings.IDENTITY] + for enc in encs: + try: + resp.body = resource.get_content(enc) + resp.set_header('Content-Encoding', encodings.str(enc)) + break + except UnknownEncodingError: + pass + if resp.body == None: + return self.serve_error(406) + resp.set_header('ETag', resource.generate_etag()) + resp.set_header('Connection', 'close') + resp.set_header('Content-Type', resource.get_content_type()) + resp.set_content_length() except FileExistError: - if not error_page: - return self.serve(config('error404'), code=404, error_page=True) + if code < 400: + return self.serve_error(404) else: - response.code = code - response.set_header('Connection', 'close') - response.body = 'Error %d' % code - response.set_content_length() + resp.code = code + resp.set_header('Connection', 'close') + resp.body = 'Error %d' % code + resp.set_content_length() except FileAccessError: - return self.serve(config('error403'), code=403, error_page=True) + return self.serve_error(403) + + return resp - return response + def serve_error(self, code): + return self.serve(config('error%d' % code, default=None), code=code) def make_date_string(self): """Make string of date and time diff --git a/project1/proj1_s4498062/webhttp/config.py b/project1/proj1_s4498062/webhttp/config.py index 35d73cb..5ed2009 100644 --- a/project1/proj1_s4498062/webhttp/config.py +++ b/project1/proj1_s4498062/webhttp/config.py @@ -4,9 +4,12 @@ __all__ = ['config'] scp = SafeConfigParser() -def config(option=None, section='webhttp', type=lambda x:x): +def config(option=None, section='webhttp', default=None, type=lambda x:x): if option == None: return scp else: - return type(scp.get(section, option)) + try: + return type(scp.get(section, option)) + except: + return default diff --git a/project1/proj1_s4498062/webhttp/encodings.py b/project1/proj1_s4498062/webhttp/encodings.py new file mode 100644 index 0000000..1a16be2 --- /dev/null +++ b/project1/proj1_s4498062/webhttp/encodings.py @@ -0,0 +1,58 @@ +import gzip +import StringIO + +IDENTITY = 0 +GZIP = 1 + +all = [IDENTITY, GZIP] + + +class UnknownEncodingError(Exception): + """Exception which is raised when an unknown encoding has been requested""" + pass + +def get(encoding): + if encoding in all: + return encoding + elif encoding.lower() == 'identity': + return IDENTITY + elif encoding.lower() == 'gzip': + return GZIP + else: + raise UnknownEncodingError + +def str(encoding): + encoding = get(encoding) + if encoding == IDENTITY: + return 'identity' + elif encoding == GZIP: + return 'gzip' + else: + raise UnknownEncodingError + +def encode(encoding, data): + encoding = get(encoding) + if encoding == IDENTITY: + return data + elif encoding == GZIP: + zbuf = StringIO.StringIO() + zfile = gzip.GzipFile(None, 'wb', 9, zbuf) + zfile.write(data) + zfile.close() + return zbuf.getvalue() + else: + raise UnknownEncodingError + +def decode(encoding, data): + encoding = get(encoding) + if encoding == IDENTITY: + return data + elif encoding == GZIP: + zbuf = StringIO.StringIO(data) + zfile = gzip.GzipFile(None, 'r', 9, zbuf) + data = zfile.read() + zfile.close() + return data + else: + raise UnknownEncodingError + diff --git a/project1/proj1_s4498062/webhttp/message.py b/project1/proj1_s4498062/webhttp/message.py index 059c765..2e1a962 100644 --- a/project1/proj1_s4498062/webhttp/message.py +++ b/project1/proj1_s4498062/webhttp/message.py @@ -5,6 +5,7 @@ This modules contains classes for representing HTTP responses and requests. import re +import encodings import regexes as r import weblogging as logging @@ -101,8 +102,7 @@ class Message(object): msg = '' msg += '%s\r\n' % self.startline() msg += '\r\n'.join([k + ": " + v for k, v in self.headers.iteritems()]) - msg += '\r\n\r\n' - msg += self.body + msg += '\r\n\r\n' + self.body return msg def __eq__(self, other): @@ -126,6 +126,38 @@ class Request(Message): [headers, body] = rest.split('\r\n\r\n', 1) self.parse_headers(headers) self.body = body + + def encodings(self): + requested = self.get_header('Accept-Encoding') + if requested == None: + return encodings.all + + encs = [] + requested = re.split(r.EncodingSplit, requested) + for value in requested: + try: + match = re.match(r.AcceptEncodingValue, value) + if match == None: + return [encodings.IDENTITY] + enc, q = match.groups() + # Unclear what should happen when some qvalues are omitted + q = q or '0.001' + if enc == '*': + for enc in encodings.all: + encs.append((encodings.get(enc), float(q))) + else: + encs.append((encodings.get(enc), float(q))) + except encodings.UnknownEncodingError: + pass + + rejected = [e[0] for e in encs if e[1] == 0] + accepted = [e for e in encs if e[1] != 0] + + if not encodings.IDENTITY in rejected + [a[0] for a in accepted]: + accepted.append((encodings.IDENTITY, 0.001)) + + accepted.sort(key=lambda x:x[1], reverse=True) + return [a[0] for a in accepted] def startline(self): return "%s %s %s" % (self.method, self.uri, self.version) @@ -145,6 +177,7 @@ class Response(Message): def __init__(self): """Initialize the Response""" super(Response, self).__init__() + self.set_header('Server', 'WebPy') def parse(self, msg): [respline, rest] = msg.split('\r\n', 1) @@ -161,6 +194,10 @@ class Response(Message): def set_content_length(self): self.set_header('Content-Length', len(self.body)) + + def decompress(self): + self.body = encodings.decode( + self.get_header('Content-Encoding'), self.body) def __str__(self): """Convert the Response to a string diff --git a/project1/proj1_s4498062/webhttp/regexes.py b/project1/proj1_s4498062/webhttp/regexes.py index b1cacc9..1ec1f3f 100644 --- a/project1/proj1_s4498062/webhttp/regexes.py +++ b/project1/proj1_s4498062/webhttp/regexes.py @@ -131,6 +131,7 @@ token = r'[^\x00-\x1f\(\)<>@,;:\\"\/\[\]?=\{\} \t]+' qdtext = r'^\x00-\x08\x0b-\x0c\x0e-\x1f\x7f"]' quotedPair = r'\\[\x00-\x7f]' quotedString = grp(r'"' + regex_opt_r([qdtext, quotedPair]) + r'*"') +qvalue = regex_opt_r([r'0(?:\.\d{0,3})?', r'1(?:\.0{0,3})?']) HTTPVersion = r'HTTP\/\d\.\d' Method = regex_opt(['OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', @@ -150,4 +151,8 @@ FieldValue = grp(regex_opt_r([grp(FieldContent), LWS]) + r'*') MessageHeader = grp(grpm(FieldName) + r':' + grpm(FieldValue)) ETagSplit = grp(r',' + LWS + r'*') +EncodingSplit = ETagSplit +contentCoding = token +coding = regex_opt_r([contentCoding, r'\*']) +AcceptEncodingValue = grp(grpm(coding) + grp(r';q=' + grpm(qvalue)) + r'?') diff --git a/project1/proj1_s4498062/webhttp/resource.py b/project1/proj1_s4498062/webhttp/resource.py index 8fefc9c..9dd948c 100644 --- a/project1/proj1_s4498062/webhttp/resource.py +++ b/project1/proj1_s4498062/webhttp/resource.py @@ -4,15 +4,19 @@ This module contains a handler class for resources. """ import binascii +import gzip import hashlib import mimetypes import os import re +import StringIO import urlparse from config import config +import encodings import regexes as r + class FileExistError(Exception): """Exception which is raised when file does not exist""" pass @@ -36,6 +40,8 @@ class Resource: Args: uri (str): Uniform Resource Identifier """ + if uri == None: + raise FileExistError self.uri = uri out = urlparse.urlparse(uri) self.path = os.path.join("content", out.path.lstrip("/")) @@ -69,13 +75,14 @@ class Resource: return etag == '*' or \ any([tag == my_etag for tag in re.split(r.ETagSplit, etag)]) - def get_content(self): + def get_content(self, encoding=encodings.IDENTITY): """Get the contents of the resource Returns: str: Contents of the resource """ - return open(self.path).read() + content = open(self.path).read() + return encodings.encode(encoding, content) def get_content_type(self): """Get the content type, i.e "text/html" -- cgit v1.2.3