Start work on HTTP responses

2025-07-09 03:04:10 -04:00 · 2015-05-18 17:19:37 +05:30 · 2015-05-18 17:19:37 +05:30 · 3ffc0f5e8c
commit 3ffc0f5e8c
parent 48f548236e
5 changed files with 252 additions and 22 deletions
--- a/src/calibre/srv/http.py
+++ b/src/calibre/srv/http.py
@ -14,6 +14,7 @@ from functools import partial

 from calibre import as_unicode
 from calibre.srv.errors import MaxSizeExceeded, NonHTTPConnRequest
+from calibre.srv.respond import finalize_output, generate_static_output
 from calibre.srv.utils import MultiDict

 HTTP1  = 'HTTP/1.0'
@ -171,20 +172,20 @@ def http_communicate(conn):
            # Don't bother writing the 408 if the response
            # has already started being written.
            if pair and not pair.sent_headers:
-                pair.simple_response(httplib.REQUEST_TIMEOUT, "Request Timeout")
+                pair.simple_response(httplib.REQUEST_TIMEOUT)
    except NonHTTPConnRequest:
        raise
    except Exception:
-        conn.server_loop.log.exception('Error serving request:', pair.path if pair else None)
+        conn.server_loop.log.exception('Error serving request:', pair.repr_for_log() if pair else 'None')
        if pair and not pair.sent_headers:
-            pair.simple_response(httplib.INTERNAL_SERVER_ERROR, "Internal Server Error")
+            pair.simple_response(httplib.INTERNAL_SERVER_ERROR)

 class FixedSizeReader(object):

    def __init__(self, socket_file, content_length):
        self.socket_file, self.remaining = socket_file, content_length

-    def __call__(self, size=-1):
+    def read(self, size=-1):
        if size < 0:
            size = self.remaining
        size = min(self.remaining, size)
@ -232,7 +233,7 @@ class ChunkedReader(object):
        else:
            self.rbuf.write(chunk[:-2])

-    def __call__(self, size=-1):
+    def read(self, size=-1):
        if size < 0:
            # Read all data
            while not self.finished:
@ -266,6 +267,7 @@ class HTTPPair(object):
        self.inheaders = MultiDict()
        self.outheaders = MultiDict()
        self.handle_request = handle_request
+        self.request_line = None
        self.path = ()
        self.qs = MultiDict()

@ -283,7 +285,7 @@ class HTTPPair(object):
        self.started_request = False
        self.reponse_protocol = HTTP1

-        self.status = b''
+        self.status_code = None
        self.sent_headers = False

        self.request_content_length = 0
@ -296,7 +298,7 @@ class HTTPPair(object):
                return
        except MaxSizeExceeded:
            self.simple_response(
-                httplib.REQUEST_URI_TOO_LONG, "Request-URI Too Long",
+                httplib.REQUEST_URI_TOO_LONG,
                "The Request-URI sent with the request exceeds the maximum allowed bytes.")
            return

@ -305,7 +307,7 @@ class HTTPPair(object):
                return
        except MaxSizeExceeded:
            self.simple_response(
-                httplib.REQUEST_ENTITY_TOO_LARGE, "Request Entity Too Large",
+                httplib.REQUEST_ENTITY_TOO_LARGE,
                "The headers sent with the request exceed the maximum allowed bytes.")
            return

@ -331,33 +333,34 @@ class HTTPPair(object):

        if not request_line.endswith(b'\r\n'):
            self.simple_response(
-                httplib.BAD_REQUEST, 'Bad Request', "HTTP requires CRLF terminators")
+                httplib.BAD_REQUEST, "HTTP requires CRLF terminators")
            return False

+        self.request_line = request_line
        try:
            method, uri, req_protocol = request_line.strip().split(b' ', 2)
            rp = int(req_protocol[5]), int(req_protocol[7])
            self.method = method.decode('ascii')
        except (ValueError, IndexError):
-            self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Malformed Request-Line")
+            self.simple_response(httplib.BAD_REQUEST, "Malformed Request-Line")
            return False

        try:
            self.request_protocol = protocol_map[rp]
        except KeyError:
-            self.simple_response(httplib.HTTP_VERSION_NOT_SUPPORTED, "HTTP Version Not Supported")
+            self.simple_response(httplib.HTTP_VERSION_NOT_SUPPORTED)
            return False

        scheme, authority, path = parse_request_uri(uri)
        if b'#' in path:
-            self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Illegal #fragment in Request-URI.")
+            self.simple_response(httplib.BAD_REQUEST, "Illegal #fragment in Request-URI.")
            return False

        if scheme:
            try:
                self.scheme = scheme.decode('ascii')
            except ValueError:
-                self.simple_response(httplib.BAD_REQUEST, "Bad Request", 'Un-decodeable scheme')
+                self.simple_response(httplib.BAD_REQUEST, 'Un-decodeable scheme')
                return False

        qs = b''
@ -366,14 +369,14 @@ class HTTPPair(object):
            try:
                self.qs = MultiDict.create_from_query_string(qs)
            except Exception:
-                self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Malformed Request-Line",
+                self.simple_response(httplib.BAD_REQUEST, "Malformed Request-Line",
                                     'Unparseable query string')
                return False

        try:
            path = '%2F'.join(unquote(x).decode('utf-8') for x in quoted_slash.split(path))
        except ValueError as e:
-            self.simple_response(httplib.BAD_REQUEST, "Bad Request", as_unicode(e))
+            self.simple_response(httplib.BAD_REQUEST, as_unicode(e))
            return False
        self.path = tuple(x.replace('%2F', '/') for x in path.split('/'))

@ -387,12 +390,12 @@ class HTTPPair(object):
            self.inheaders = read_headers(partial(self.conn.socket_file.readline, maxsize=self.max_header_line_size))
            self.request_content_length = int(self.inheaders.get('Content-Length', 0))
        except ValueError as e:
-            self.simple_response(httplib.BAD_REQUEST, "Bad Request", as_unicode(e))
+            self.simple_response(httplib.BAD_REQUEST, as_unicode(e))
            return False

        if self.request_content_length > self.server_loop.max_request_body_size:
            self.simple_response(
-                httplib.REQUEST_ENTITY_TOO_LARGE, "Request Entity Too Large",
+                httplib.REQUEST_ENTITY_TOO_LARGE,
                "The entity sent with the request exceeds the maximum "
                "allowed bytes (%d)." % self.server_loop.max_request_body_size)
            return False
@ -421,7 +424,7 @@ class HTTPPair(object):
                else:
                    # Note that, even if we see "chunked", we must reject
                    # if there is an extension we don't recognize.
-                    self.simple_response(httplib.NOT_IMPLEMENTED, "Not Implemented", "Unknown transfer encoding: %s" % enc)
+                    self.simple_response(httplib.NOT_IMPLEMENTED, "Unknown transfer encoding: %r" % enc)
                    self.close_connection = True
                    return False

@ -432,17 +435,17 @@ class HTTPPair(object):
            self.flushed_write(msg.encode('ascii'))
        return True

-    def simple_response(self, status_code, status_text, msg=""):
+    def simple_response(self, status_code, msg=""):
        abort = status_code in (httplib.REQUEST_ENTITY_TOO_LARGE, httplib.REQUEST_URI_TOO_LONG)
        if abort:
            self.close_connection = True
            if self.reponse_protocol is HTTP1:
                # HTTP/1.0 has no 413/414 codes
-                status_code, status_text = 400, 'Bad Request'
+                status_code = httplib.BAD_REQUEST

        msg = msg.encode('utf-8')
        buf = [
-            '%s %d %s' % (self.reponse_protocol, status_code, status_text),
+            '%s %d %s' % (self.reponse_protocol, status_code, httplib.responses[status_code]),
            "Content-Length: %s" % len(msg),
            "Content-Type: text/plain; charset=UTF-8"
        ]
@ -456,3 +459,34 @@ class HTTPPair(object):
    def flushed_write(self, data):
        self.conn.socket_file.write(data)
        self.conn.socket_file.flush()
+
+    def repr_for_log(self):
+        return 'HTTPPair: %r\nPath:%r\nQuery:\n%s\nIn Headers:\n%s\nOut Headers:\n%s' % (
+            self.request_line, self.path, self.qs.pretty('\t'), self.inheaders.pretty('\t'), self.outheaders.pretty('\t')
+        )
+
+    def generate_static_output(self, name, generator):
+        return generate_static_output(self.server_loop.gso_cache, self.server_loop.gso_lock, name, generator)
+
+    def response(self):
+        if self.chunked_read:
+            self.input_reader = ChunkedReader(self.conn.socket_file, self.server_loop.max_request_body_size)
+        else:
+            self.input_reader = FixedSizeReader(self.conn.socket_file, self.request_content_length)
+
+        output = self.handle_request(self)
+        if self.status_code is None:
+            raise Exception('Request handler did not set status_code')
+        # Read and discard any remaining body from the HTTP request
+        self.input_reader.read()
+
+        self.status_code, output = finalize_output(output, self.inheaders, self.outheaders, self.status_code)
+
+        self.send_headers()
+
+        if self.method != 'HEAD':
+            output.commit(self.conn.socket_file)
+        self.conn.socket_file.flush()
+
+    def send_headers(self):
+        self.sent_headers = True
--- a/src/calibre/srv/loop.py
+++ b/src/calibre/srv/loop.py
@ -9,7 +9,7 @@ __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
 import socket, os, errno, ssl, time, sys
 from operator import and_
 from Queue import Queue, Full
-from threading import Thread, current_thread
+from threading import Thread, current_thread, Lock
 from io import DEFAULT_BUFFER_SIZE, BytesIO

 from calibre.srv.errors import NonHTTPConnRequest, MaxSizeExceeded
@ -570,6 +570,7 @@ class ServerLoop(object):
        if http_handler is None and nonhttp_handler is None:
            raise ValueError('You must specify at least one protocol handler')
        self.log = log or ThreadSafeLog(level=ThreadSafeLog.DEBUG)
+        self.gso_cache, self.gso_lock = {}, Lock()
        self.allow_socket_preallocation = allow_socket_preallocation
        self.no_delay = no_delay
        self.request_queue_size = request_queue_size
--- a/src/calibre/srv/respond.py
+++ b/src/calibre/srv/respond.py
@ -0,0 +1,176 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os, hashlib, shutil, httplib, zlib, struct, time
+from io import DEFAULT_BUFFER_SIZE, BytesIO
+
+from calibre import force_unicode
+
+def acceptable_encoding(val, allowed=frozenset({'gzip'})):
+    def enc(x):
+        e, r = x.partition(';')[::2]
+        p, v = r.partition('=')[::2]
+        q = 1.0
+        if p == 'q' and v:
+            try:
+                q = float(v)
+            except Exception:
+                pass
+        return e.lower(), q
+
+    emap = dict(enc(x.strip()) for x in val.split(','))
+    acceptable = sorted(set(emap) & allowed, key=emap.__getitem__, reverse=True)
+    if acceptable:
+        return acceptable[0]
+
+def gzip_prefix(mtime):
+    # See http://www.gzip.org/zlib/rfc-gzip.html
+    return b''.join((
+        b'\x1f\x8b',       # ID1 and ID2: gzip marker
+        b'\x08',           # CM: compression method
+        b'\x00',           # FLG: none set
+        # MTIME: 4 bytes
+        struct.pack(b"<L", int(mtime) & 0xFFFFFFFF),
+        b'\x02',           # XFL: max compression, slowest algo
+        b'\xff',           # OS: unknown
+    ))
+
+def write_chunked_data(dest, data):
+    dest.write(('%X\r\n' % len(data)).encode('ascii'))
+    dest.write(data)
+    dest.write(b'\r\n')
+
+def write_compressed_file_obj(input_file, dest, compress_level=6):
+    crc = zlib.crc32(b"")
+    size = 0
+    zobj = zlib.compressobj(compress_level,
+                            zlib.DEFLATED, -zlib.MAX_WBITS,
+                            zlib.DEF_MEM_LEVEL, 0)
+    prefix_written = False
+    while True:
+        data = input_file.read(DEFAULT_BUFFER_SIZE)
+        if not data:
+            break
+        size += len(data)
+        crc = zlib.crc32(data, crc)
+        data = zobj.compress(data)
+        if not prefix_written:
+            prefix_written = True
+            data = gzip_prefix(time.time()) + data
+        write_chunked_data(dest, data)
+    data = zobj.flush() + struct.pack(b"<L", crc & 0xFFFFFFFF) + struct.pack(b"<L", size & 0xFFFFFFFF)
+    write_chunked_data(dest, data)
+    write_chunked_data(dest, b'')
+
+
+class FileSystemOutputFile(object):
+
+    def __init__(self, output, outheaders):
+        self.output_file = output
+        pos = output.tell()
+        output.seek(0, os.SEEK_END)
+        self.content_length = output.tell() - pos
+        self.etag = hashlib.sha1(force_unicode(output.name or '') + str(os.fstat(output.fileno()).st_mtime)).hexdigest()
+        output.seek(pos)
+        self.accept_ranges = True
+
+    def write(self, dest):
+        shutil.copyfileobj(self.output_file, dest)
+        self.output_file = None
+
+    def write_compressed(self, dest):
+        write_compressed_file_obj(self.output_file, dest)
+
+class DynamicOutput(object):
+
+    def __init__(self, output, outheaders):
+        if isinstance(output, bytes):
+            self.data = output
+        else:
+            self.data = output.encode('utf-8')
+            ct = outheaders.get('Content-Type', 'text/plain')
+            if 'charset=' not in ct:
+                ct += '; charset=UTF-8'
+            outheaders.set('Content-Type', ct, replace=True)
+        self.content_length = len(self.data)
+        self.etag = None
+        self.accept_ranges = False
+
+    def write(self, dest):
+        dest.write(self.data)
+        self.data = None
+
+    def write_compressed(self, dest):
+        write_compressed_file_obj(BytesIO(self.data), dest)
+
+class GeneratedOutput(object):
+
+    def __init__(self, output, outheaders):
+        self.output = output
+        self.content_length = self.etag = None
+        self.accept_ranges = False
+
+    def write(self, dest):
+        for line in self.output:
+            if line:
+                write_chunked_data(dest, line)
+
+class StaticGeneratedOutput(object):
+
+    def __init__(self, data):
+        self.data = data
+        self.etag = hashlib.sha1(data).hexdigest()
+        self.content_length = len(data)
+        self.accept_ranges = False
+
+    def write(self, dest):
+        dest.write(self.data)
+
+    def write_compressed(self, dest):
+        write_compressed_file_obj(BytesIO(self.data), dest)
+
+def generate_static_output(cache, gso_lock, name, generator):
+    with gso_lock:
+        ans = cache.get(name)
+        if ans is None:
+            ans = cache[name] = StaticGeneratedOutput(generator())
+        return ans
+
+def finalize_output(output, inheaders, outheaders, status_code):
+    ct = outheaders.get('Content-Type', '')
+    compressible = not ct or ct.startswith('text/') or ct.startswith('image/svg') or ct.startswith('application/json')
+    if isinstance(output, file):
+        output = FileSystemOutputFile(output, outheaders)
+    elif isinstance(output, (bytes, type(''))):
+        output = DynamicOutput(output, outheaders)
+    elif isinstance(output, StaticGeneratedOutput):
+        pass
+    else:
+        output = GeneratedOutput(output, outheaders)
+    compressible = (status_code == httplib.OK and compressible and output.content_length > 1024 and
+                    acceptable_encoding(inheaders.get('Accept-Encoding', '')))
+    accept_ranges = not compressible and output.accept_ranges is not None and status_code == httplib.OK
+
+    for header in 'Accept-Ranges Content-Encoding Transfer-Encoding ETag'.split():
+        outheaders.pop('header', all=True)
+
+    # TODO: If-None-Match, Ranges, If-Range
+
+    if output.etag:
+        outheaders.set('ETag', output.etag, replace=True)
+    if accept_ranges:
+        outheaders.set('Accept-Ranges', 'bytes', replace=True)
+    elif compressible:
+        outheaders.set('Content-Encoding', 'gzip', replace=True)
+
+    if compressible or output.content_length is None:
+        outheaders.set('Transfer-Encoding', 'chunked', replace=True)
+
+    output.commit = output.write_compressed if compressible else output.write
+
+    return status_code, output
--- a/src/calibre/srv/tests/http.py
+++ b/src/calibre/srv/tests/http.py
@ -45,3 +45,14 @@ class TestHTTP(BaseTest):
            read_headers(headers('Connection:a\n').readline)
            read_headers(headers(' Connection:a\n').readline)

+    def test_accept_encoding(self):
+        'Test parsing of Accept-Encoding'
+        from calibre.srv.http import acceptable_encoding
+        def test(name, val, ans, allowed={'gzip'}):
+            self.ae(acceptable_encoding(val, allowed), ans, name + ' failed')
+        test('Empty field', '', None)
+        test('Simple', 'gzip', 'gzip')
+        test('Case insensitive', 'GZIp', 'gzip')
+        test('Multiple', 'gzip, identity', 'gzip')
+        test('Priority', '1;q=0.5, 2;q=0.75, 3;q=1.0', '3', {'1', '2', '3'})
+
--- a/src/calibre/srv/utils.py
+++ b/src/calibre/srv/utils.py
@ -7,6 +7,7 @@ __license__ = 'GPL v3'
 __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'

 from urlparse import parse_qs
+import repr as reprlib

 class MultiDict(dict):

@ -67,3 +68,10 @@ class MultiDict(dict):
        if ans is default:
            return [] if all else default
        return ans if all else ans[-1]
+
+    def __repr__(self):
+        return '{' + ', '.join('%s: %s' % (reprlib.repr(k), reprlib.repr(v)) for k, v in self.iteritems()) + '}'
+    __str__ = __unicode__ = __repr__
+
+    def pretty(self, leading_whitespace=''):
+        return leading_whitespace + ('\n' + leading_whitespace).join('%s: %s' % (k, v) for k, v in self.items())