diff options
author | Camil Staps | 2016-03-05 10:25:03 +0100 |
---|---|---|
committer | Camil Staps | 2016-03-05 10:25:03 +0100 |
commit | 804ae3b864e1fe47ea38ac1a2283019387c33ac0 (patch) | |
tree | 043122a7497dd162b1bcecc548eb723307d99f5e | |
parent | Project 1: Config system; ETags; error pages (diff) |
Step 11: content encoding (only gzip for the moment)
-rw-r--r-- | project1/proj1_s4498062/webhttp/composer.py | 48 | ||||
-rw-r--r-- | project1/proj1_s4498062/webhttp/config.py | 7 | ||||
-rw-r--r-- | project1/proj1_s4498062/webhttp/encodings.py | 58 | ||||
-rw-r--r-- | project1/proj1_s4498062/webhttp/message.py | 41 | ||||
-rw-r--r-- | project1/proj1_s4498062/webhttp/regexes.py | 5 | ||||
-rw-r--r-- | project1/proj1_s4498062/webhttp/resource.py | 11 | ||||
-rw-r--r-- | project1/proj1_s4498062/webtests.py | 7 |
7 files changed, 154 insertions, 23 deletions
diff --git a/project1/proj1_s4498062/webhttp/composer.py b/project1/proj1_s4498062/webhttp/composer.py index f9bc0e0..1c61628 100644 --- a/project1/proj1_s4498062/webhttp/composer.py +++ b/project1/proj1_s4498062/webhttp/composer.py @@ -8,6 +8,7 @@ import re import time
from config import config
+import encodings
from message import Response
from resource import Resource, FileExistError, FileAccessError
import weblogging as logging
@@ -44,34 +45,47 @@ class ResponseComposer: return self.serve(request.uri, request=request)
- def serve(self, uri, code=200, etag=None, request=None, error_page=False):
- response = Response()
+ def serve(self, uri, code=200, etag=None, request=None):
+ resp = Response()
try:
resource = Resource(uri)
req = request
if req != None and (
resource.etag_match(req.get_header('If-None-Match')) or \
not resource.etag_match(req.get_header('If-Match') or '*')):
- response.code = 304
+ resp.code = 304
else:
- response.code = code
- response.body = resource.get_content()
- response.set_header('ETag', resource.generate_etag())
- response.set_header('Connection', 'close')
- response.set_header('Content-Type', resource.get_content_type())
- response.set_content_length()
+ resp.code = code
+ resp.body = None
+ encs = req.encodings() if req != None else [encodings.IDENTITY]
+ for enc in encs:
+ try:
+ resp.body = resource.get_content(enc)
+ resp.set_header('Content-Encoding', encodings.str(enc))
+ break
+ except UnknownEncodingError:
+ pass
+ if resp.body == None:
+ return self.serve_error(406)
+ resp.set_header('ETag', resource.generate_etag())
+ resp.set_header('Connection', 'close')
+ resp.set_header('Content-Type', resource.get_content_type())
+ resp.set_content_length()
except FileExistError:
- if not error_page:
- return self.serve(config('error404'), code=404, error_page=True)
+ if code < 400:
+ return self.serve_error(404)
else:
- response.code = code
- response.set_header('Connection', 'close')
- response.body = 'Error %d' % code
- response.set_content_length()
+ resp.code = code
+ resp.set_header('Connection', 'close')
+ resp.body = 'Error %d' % code
+ resp.set_content_length()
except FileAccessError:
- return self.serve(config('error403'), code=403, error_page=True)
+ return self.serve_error(403)
+
+ return resp
- return response
+ def serve_error(self, code):
+ return self.serve(config('error%d' % code, default=None), code=code)
def make_date_string(self):
"""Make string of date and time
diff --git a/project1/proj1_s4498062/webhttp/config.py b/project1/proj1_s4498062/webhttp/config.py index 35d73cb..5ed2009 100644 --- a/project1/proj1_s4498062/webhttp/config.py +++ b/project1/proj1_s4498062/webhttp/config.py @@ -4,9 +4,12 @@ __all__ = ['config'] scp = SafeConfigParser() -def config(option=None, section='webhttp', type=lambda x:x): +def config(option=None, section='webhttp', default=None, type=lambda x:x): if option == None: return scp else: - return type(scp.get(section, option)) + try: + return type(scp.get(section, option)) + except: + return default diff --git a/project1/proj1_s4498062/webhttp/encodings.py b/project1/proj1_s4498062/webhttp/encodings.py new file mode 100644 index 0000000..1a16be2 --- /dev/null +++ b/project1/proj1_s4498062/webhttp/encodings.py @@ -0,0 +1,58 @@ +import gzip +import StringIO + +IDENTITY = 0 +GZIP = 1 + +all = [IDENTITY, GZIP] + + +class UnknownEncodingError(Exception): + """Exception which is raised when an unknown encoding has been requested""" + pass + +def get(encoding): + if encoding in all: + return encoding + elif encoding.lower() == 'identity': + return IDENTITY + elif encoding.lower() == 'gzip': + return GZIP + else: + raise UnknownEncodingError + +def str(encoding): + encoding = get(encoding) + if encoding == IDENTITY: + return 'identity' + elif encoding == GZIP: + return 'gzip' + else: + raise UnknownEncodingError + +def encode(encoding, data): + encoding = get(encoding) + if encoding == IDENTITY: + return data + elif encoding == GZIP: + zbuf = StringIO.StringIO() + zfile = gzip.GzipFile(None, 'wb', 9, zbuf) + zfile.write(data) + zfile.close() + return zbuf.getvalue() + else: + raise UnknownEncodingError + +def decode(encoding, data): + encoding = get(encoding) + if encoding == IDENTITY: + return data + elif encoding == GZIP: + zbuf = StringIO.StringIO(data) + zfile = gzip.GzipFile(None, 'r', 9, zbuf) + data = zfile.read() + zfile.close() + return data + else: + raise UnknownEncodingError + diff --git a/project1/proj1_s4498062/webhttp/message.py b/project1/proj1_s4498062/webhttp/message.py index 059c765..2e1a962 100644 --- a/project1/proj1_s4498062/webhttp/message.py +++ b/project1/proj1_s4498062/webhttp/message.py @@ -5,6 +5,7 @@ This modules contains classes for representing HTTP responses and requests. import re
+import encodings
import regexes as r
import weblogging as logging
@@ -101,8 +102,7 @@ class Message(object): msg = ''
msg += '%s\r\n' % self.startline()
msg += '\r\n'.join([k + ": " + v for k, v in self.headers.iteritems()])
- msg += '\r\n\r\n'
- msg += self.body
+ msg += '\r\n\r\n' + self.body
return msg
def __eq__(self, other):
@@ -126,6 +126,38 @@ class Request(Message): [headers, body] = rest.split('\r\n\r\n', 1)
self.parse_headers(headers)
self.body = body
+
+ def encodings(self):
+ requested = self.get_header('Accept-Encoding')
+ if requested == None:
+ return encodings.all
+
+ encs = []
+ requested = re.split(r.EncodingSplit, requested)
+ for value in requested:
+ try:
+ match = re.match(r.AcceptEncodingValue, value)
+ if match == None:
+ return [encodings.IDENTITY]
+ enc, q = match.groups()
+ # Unclear what should happen when some qvalues are omitted
+ q = q or '0.001'
+ if enc == '*':
+ for enc in encodings.all:
+ encs.append((encodings.get(enc), float(q)))
+ else:
+ encs.append((encodings.get(enc), float(q)))
+ except encodings.UnknownEncodingError:
+ pass
+
+ rejected = [e[0] for e in encs if e[1] == 0]
+ accepted = [e for e in encs if e[1] != 0]
+
+ if not encodings.IDENTITY in rejected + [a[0] for a in accepted]:
+ accepted.append((encodings.IDENTITY, 0.001))
+
+ accepted.sort(key=lambda x:x[1], reverse=True)
+ return [a[0] for a in accepted]
def startline(self):
return "%s %s %s" % (self.method, self.uri, self.version)
@@ -145,6 +177,7 @@ class Response(Message): def __init__(self):
"""Initialize the Response"""
super(Response, self).__init__()
+ self.set_header('Server', 'WebPy')
def parse(self, msg):
[respline, rest] = msg.split('\r\n', 1)
@@ -161,6 +194,10 @@ class Response(Message): def set_content_length(self):
self.set_header('Content-Length', len(self.body))
+
+ def decompress(self):
+ self.body = encodings.decode(
+ self.get_header('Content-Encoding'), self.body)
def __str__(self):
"""Convert the Response to a string
diff --git a/project1/proj1_s4498062/webhttp/regexes.py b/project1/proj1_s4498062/webhttp/regexes.py index b1cacc9..1ec1f3f 100644 --- a/project1/proj1_s4498062/webhttp/regexes.py +++ b/project1/proj1_s4498062/webhttp/regexes.py @@ -131,6 +131,7 @@ token = r'[^\x00-\x1f\(\)<>@,;:\\"\/\[\]?=\{\} \t]+' qdtext = r'^\x00-\x08\x0b-\x0c\x0e-\x1f\x7f"]' quotedPair = r'\\[\x00-\x7f]' quotedString = grp(r'"' + regex_opt_r([qdtext, quotedPair]) + r'*"') +qvalue = regex_opt_r([r'0(?:\.\d{0,3})?', r'1(?:\.0{0,3})?']) HTTPVersion = r'HTTP\/\d\.\d' Method = regex_opt(['OPTIONS', 'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', @@ -150,4 +151,8 @@ FieldValue = grp(regex_opt_r([grp(FieldContent), LWS]) + r'*') MessageHeader = grp(grpm(FieldName) + r':' + grpm(FieldValue)) ETagSplit = grp(r',' + LWS + r'*') +EncodingSplit = ETagSplit +contentCoding = token +coding = regex_opt_r([contentCoding, r'\*']) +AcceptEncodingValue = grp(grpm(coding) + grp(r';q=' + grpm(qvalue)) + r'?') diff --git a/project1/proj1_s4498062/webhttp/resource.py b/project1/proj1_s4498062/webhttp/resource.py index 8fefc9c..9dd948c 100644 --- a/project1/proj1_s4498062/webhttp/resource.py +++ b/project1/proj1_s4498062/webhttp/resource.py @@ -4,15 +4,19 @@ This module contains a handler class for resources. """
import binascii
+import gzip
import hashlib
import mimetypes
import os
import re
+import StringIO
import urlparse
from config import config
+import encodings
import regexes as r
+
class FileExistError(Exception):
"""Exception which is raised when file does not exist"""
pass
@@ -36,6 +40,8 @@ class Resource: Args:
uri (str): Uniform Resource Identifier
"""
+ if uri == None:
+ raise FileExistError
self.uri = uri
out = urlparse.urlparse(uri)
self.path = os.path.join("content", out.path.lstrip("/"))
@@ -69,13 +75,14 @@ class Resource: return etag == '*' or \
any([tag == my_etag for tag in re.split(r.ETagSplit, etag)])
- def get_content(self):
+ def get_content(self, encoding=encodings.IDENTITY):
"""Get the contents of the resource
Returns:
str: Contents of the resource
"""
- return open(self.path).read()
+ content = open(self.path).read()
+ return encodings.encode(encoding, content)
def get_content_type(self):
"""Get the content type, i.e "text/html"
diff --git a/project1/proj1_s4498062/webtests.py b/project1/proj1_s4498062/webtests.py index 5e45acf..b0ff889 100644 --- a/project1/proj1_s4498062/webtests.py +++ b/project1/proj1_s4498062/webtests.py @@ -114,6 +114,13 @@ class TestGetRequests(unittest.TestCase): """GET which requests an existing resource using gzip encodign, which
is accepted by the server.
"""
+ r1 = self.request('GET', '/test', self.default_headers + \
+ [('Accept-Encoding', 'gzip;q=1, identity;q=0')])
+ self.assertEqual(r1.get_header('Content-Encoding'), 'gzip')
+ r1.decompress()
+ r2 = self.request('GET', '/test', self.default_headers + \
+ [('Accept-Encoding', '')])
+ self.assertEqual(r1.body, r2.body)
pass
def test_doubledot(self):
|