diff --git a/src/calibre/srv/errors.py b/src/calibre/srv/errors.py index 64dfd0ee61..f0ad28cae4 100644 --- a/src/calibre/srv/errors.py +++ b/src/calibre/srv/errors.py @@ -17,6 +17,8 @@ class MaxSizeExceeded(Exception): def __init__(self, prefix, size, limit): Exception.__init__(self, prefix + (' %d > maximum %d' % (size, limit))) + self.size = size + self.limit = limit class HTTP404(Exception): pass @@ -25,3 +27,6 @@ class IfNoneMatch(Exception): def __init__(self, etag=None): Exception.__init__(self, '') self.etag = etag + +class BadChunkedInput(ValueError): + pass diff --git a/src/calibre/srv/http.py b/src/calibre/srv/http.py index 3aa3aeff00..578475f626 100644 --- a/src/calibre/srv/http.py +++ b/src/calibre/srv/http.py @@ -16,7 +16,7 @@ from operator import itemgetter from calibre import as_unicode from calibre.constants import __version__ from calibre.srv.errors import ( - MaxSizeExceeded, NonHTTPConnRequest, HTTP404, IfNoneMatch) + MaxSizeExceeded, NonHTTPConnRequest, HTTP404, IfNoneMatch, BadChunkedInput) from calibre.srv.respond import finalize_output, generate_static_output from calibre.srv.utils import MultiDict, http_date @@ -146,6 +146,9 @@ def read_headers(readline): # {{{ def http_communicate(conn): ' Represents interaction with a http client over a single, persistent connection ' request_seen = False + def repr_for_pair(pair): + return pair.repr_for_log() if getattr(pair, 'started_request', False) else 'None' + try: while True: # (re)set pair to None so that if something goes wrong in @@ -181,13 +184,25 @@ def http_communicate(conn): except socket.error: # This socket is broken. Log the error and close connection conn.server_loop.log.exception( - 'Communication failed while processing request:', pair.repr_for_log() if getattr(pair, 'started_request', False) else 'None') + 'Communication failed (socket error) while processing request:', repr_for_pair(pair)) + except MaxSizeExceeded as e: + conn.server_loop.log.warn('Too large request body (%d > %d) for request:' % (e.size, e.limit), repr_for_pair(pair)) + # Can happen if the request uses chunked transfer encoding + if pair and not pair.sent_headers: + pair.simple_response(httplib.REQUEST_ENTITY_TOO_LARGE, + "The entity sent with the request exceeds the maximum " + "allowed bytes (%d)." % pair.max_request_body_size) + except BadChunkedInput as e: + conn.server_loop.log.warn('Bad chunked encoding (%s) for request:' % as_unicode(e.message), repr_for_pair(pair)) + if pair and not pair.sent_headers: + pair.simple_response(httplib.BAD_REQUEST, + 'Invalid chunked encoding for request body: %s' % as_unicode(e.message)) except Exception: conn.server_loop.log.exception('Error serving request:', pair.repr_for_log() if getattr(pair, 'started_request', False) else 'None') if pair and not pair.sent_headers: pair.simple_response(httplib.INTERNAL_SERVER_ERROR) -class FixedSizeReader(object): +class FixedSizeReader(object): # {{{ def __init__(self, socket_file, content_length): self.socket_file, self.remaining = socket_file, content_length @@ -201,9 +216,9 @@ class FixedSizeReader(object): data = self.socket_file.read(size) self.remaining -= len(data) return data +# }}} - -class ChunkedReader(object): +class ChunkedReader(object): # {{{ def __init__(self, socket_file, maxsize): self.socket_file, self.maxsize = socket_file, maxsize @@ -225,14 +240,14 @@ class ChunkedReader(object): try: chunk_size = int(line, 16) + 2 except Exception: - raise ValueError('%s is not a valid chunk size' % reprlib.repr(chunk_size)) + raise BadChunkedInput('%s is not a valid chunk size' % reprlib.repr(chunk_size)) if chunk_size + self.bytes_read > self.maxsize: raise MaxSizeExceeded('Request entity too large', self.bytes_read + chunk_size, self.maxsize) chunk = self.socket_file.read(chunk_size) if len(chunk) < chunk_size: - raise ValueError('Bad chunked encoding, chunk truncated: %d < %s' % (len(chunk), chunk_size)) + raise BadChunkedInput('Bad chunked encoding, chunk truncated: %d < %s' % (len(chunk), chunk_size)) if not chunk.endswith(b'\r\n'): - raise ValueError('Bad chunked encoding: %r != CRLF' % chunk[:-2]) + raise BadChunkedInput('Bad chunked encoding: %r != CRLF' % chunk[:-2]) self.rbuf.seek(0, os.SEEK_END) self.bytes_read += chunk_size if chunk_size == 2: @@ -260,7 +275,7 @@ class ChunkedReader(object): self.rbuf.write(data[size:]) return data[:size] return data - +# }}} class HTTPPair(object): @@ -269,8 +284,8 @@ class HTTPPair(object): def __init__(self, handle_request, conn): self.conn = conn self.server_loop = conn.server_loop - self.max_header_line_size = self.server_loop.opts.max_header_line_size * 1024 - self.max_request_body_size = self.server_loop.opts.max_request_body_size * 1024 * 1024 + self.max_header_line_size = int(self.server_loop.opts.max_header_line_size * 1024) + self.max_request_body_size = int(self.server_loop.opts.max_request_body_size * 1024 * 1024) self.scheme = 'http' if self.server_loop.ssl_context is None else 'https' self.inheaders = MultiDict() self.outheaders = MultiDict() @@ -292,7 +307,7 @@ class HTTPPair(object): self.close_connection = False self.started_request = False - self.reponse_protocol = HTTP1 + self.response_protocol = HTTP1 self.status_code = None self.sent_headers = False @@ -305,7 +320,8 @@ class HTTPPair(object): try: if not self.read_request_line(): return - except MaxSizeExceeded: + except MaxSizeExceeded as e: + self.server_loop.log.warn('Too large request URI (%d > %d), dropping connection' % (e.size, e.limit)) self.simple_response( httplib.REQUEST_URI_TOO_LONG, "The Request-URI sent with the request exceeds the maximum allowed bytes.") @@ -314,7 +330,8 @@ class HTTPPair(object): try: if not self.read_request_headers(): return - except MaxSizeExceeded: + except MaxSizeExceeded as e: + self.server_loop.log.warn('Too large header (%d > %d) for request, dropping connection' % (e.size, e.limit)) self.simple_response( httplib.REQUEST_ENTITY_TOO_LARGE, "The headers sent with the request exceed the maximum allowed bytes.") @@ -358,6 +375,7 @@ class HTTPPair(object): except KeyError: self.simple_response(httplib.HTTP_VERSION_NOT_SUPPORTED) return False + self.response_protocol = protocol_map[min((1, 1), rp)] scheme, authority, path = parse_request_uri(uri) if b'#' in path: @@ -386,9 +404,7 @@ class HTTPPair(object): except ValueError as e: self.simple_response(httplib.BAD_REQUEST, as_unicode(e)) return False - self.path = tuple(x.replace('%2F', '/') for x in path.split('/')) - - self.response_protocol = protocol_map[min((1, 1), rp)] + self.path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/')))) return True @@ -447,18 +463,18 @@ class HTTPPair(object): abort = status_code in (httplib.REQUEST_ENTITY_TOO_LARGE, httplib.REQUEST_URI_TOO_LONG) if abort: self.close_connection = True - if self.reponse_protocol is HTTP1: + if self.response_protocol is HTTP1: # HTTP/1.0 has no 413/414 codes status_code = httplib.BAD_REQUEST msg = msg.encode('utf-8') buf = [ - '%s %d %s' % (self.reponse_protocol, status_code, httplib.responses[status_code]), + '%s %d %s' % (self.response_protocol, status_code, httplib.responses[status_code]), "Content-Length: %s" % len(msg), "Content-Type: text/plain; charset=UTF-8", "Date: " + http_date(), ] - if abort and self.reponse_protocol is HTTP11: + if abort and self.response_protocol is HTTP11: buf.append("Connection: close") buf.append('') buf = [(x + '\r\n').encode('ascii') for x in buf] @@ -470,7 +486,7 @@ class HTTPPair(object): def send_not_modified(self, etag=None): buf = [ - '%s %d %s' % (self.reponse_protocol, httplib.NOT_MODIFIED, httplib.responses[httplib.NOT_MODIFIED]), + '%s %d %s' % (self.response_protocol, httplib.NOT_MODIFIED, httplib.responses[httplib.NOT_MODIFIED]), "Content-Length: 0", "Date: " + http_date(), ] @@ -489,9 +505,16 @@ class HTTPPair(object): self.conn.socket_file.flush() def repr_for_log(self): - return 'HTTPPair: %r\nPath:%r\nQuery:\n%s\nIn Headers:\n%s\nOut Headers:\n%s' % ( - self.request_line, self.path, self.qs.pretty('\t'), self.inheaders.pretty('\t'), self.outheaders.pretty('\t') - ) + ans = ['HTTPPair: %r' % self.request_line] + if self.path: + ans.append('Path: %r' % (self.path,)) + if self.qs: + ans.append('Query: %r' % self.qs) + if self.inheaders: + ans.extend(('In Headers:', self.inheaders.pretty('\t'))) + if self.outheaders: + ans.extend(('Out Headers:', self.outheaders.pretty('\t'))) + return '\n'.join(ans) def generate_static_output(self, name, generator): return generate_static_output(self.server_loop.gso_cache, self.server_loop.gso_lock, name, generator) @@ -532,7 +555,7 @@ class HTTPPair(object): self.outheaders.set('Date', http_date(), replace_all=True) self.outheaders.set('Server', 'calibre %s' % __version__, replace_all=True) if 'Connection' not in self.outheaders: - if self.reponse_protocol is HTTP11: + if self.response_protocol is HTTP11: if self.close_connection: self.outheaders.set('Connection', 'close') else: diff --git a/src/calibre/srv/loop.py b/src/calibre/srv/loop.py index 9ce205db1c..66dccdc057 100644 --- a/src/calibre/srv/loop.py +++ b/src/calibre/srv/loop.py @@ -444,6 +444,10 @@ class ThreadPool(object): # {{{ def idle(self): return sum(int(not w.serving) for w in self._threads) + @property + def busy(self): + return sum(int(w.serving) for w in self._threads) + def put(self, obj): self._queue.put(obj, block=True, timeout=self._queue_put_timeout) diff --git a/src/calibre/srv/opts.py b/src/calibre/srv/opts.py index 38cfc8ed90..1fa5ef079d 100644 --- a/src/calibre/srv/opts.py +++ b/src/calibre/srv/opts.py @@ -52,11 +52,11 @@ raw_options = ( None, 'Max. size of single HTTP header (in KB)', - 'max_header_line_size', 8, + 'max_header_line_size', 8.0, None, 'Max. size of a HTTP request (in MB)', - 'max_request_body_size', 500, + 'max_request_body_size', 500.0, None, 'Decrease latency by using the TCP_NODELAY feature', diff --git a/src/calibre/srv/tests/base.py b/src/calibre/srv/tests/base.py index 292f68757f..9f534904c9 100644 --- a/src/calibre/srv/tests/base.py +++ b/src/calibre/srv/tests/base.py @@ -32,16 +32,18 @@ class TestServer(Thread): daemon = True - def __init__(self, handler): + def __init__(self, handler, **kwargs): Thread.__init__(self, name='ServerMain') from calibre.srv.opts import Options from calibre.srv.loop import ServerLoop from calibre.srv.http import create_http_handler + kwargs['shutdown_timeout'] = kwargs.get('shutdown_timeout', 0.1) self.loop = ServerLoop( - opts=Options(shutdown_timeout=0.1), + opts=Options(**kwargs), bind_address=('localhost', 0), http_handler=create_http_handler(handler), log=TestLog(level=ThreadSafeLog.WARN), ) + self.log = self.loop.log def run(self): try: diff --git a/src/calibre/srv/tests/http.py b/src/calibre/srv/tests/http.py index 7368cef0dc..efe527abb5 100644 --- a/src/calibre/srv/tests/http.py +++ b/src/calibre/srv/tests/http.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' -import textwrap, httplib, socket +import textwrap, httplib from io import BytesIO from calibre.srv.tests.base import BaseTest, TestServer @@ -64,7 +64,7 @@ class TestHTTP(BaseTest): body = 'Requested resource not found' def handler(conn): raise HTTP404(body) - with TestServer(handler) as server: + with TestServer(handler, max_header_line_size=100./1024, max_request_body_size=100./(1024*1024)) as server: # Test 404 conn = server.connect() conn.request('HEAD', '/moose') @@ -80,10 +80,14 @@ class TestHTTP(BaseTest): self.ae(r.status, httplib.NOT_FOUND) self.ae(r.read(), 'Requested resource not found') - server.change_handler(lambda conn:conn.path[1] + conn.input_reader.read().decode('ascii')) + server.change_handler(lambda conn:conn.path[0] + conn.input_reader.read().decode('ascii')) + conn = server.connect() + # Test simple GET - conn.request('GET', '/test') - self.ae(conn.getresponse().read(), 'test') + conn.request('GET', '/test/') + r = conn.getresponse() + self.ae(r.status, httplib.OK) + self.ae(r.read(), 'test') # Test POST with simple body conn.request('POST', '/test', 'body') @@ -98,6 +102,24 @@ class TestHTTP(BaseTest): self.ae(r.status, httplib.CREATED) self.ae(r.read(), 'testbody') + # Test various incorrect input + orig_level, server.log.filter_level = server.log.filter_level, server.log.ERROR + + conn.request('GET', '/test' + ('a' * 200)) + r = conn.getresponse() + self.ae(r.status, httplib.BAD_REQUEST) + + conn.request('GET', '/test', ('a' * 200)) + r = conn.getresponse() + self.ae(r.status, httplib.REQUEST_ENTITY_TOO_LARGE) + + conn.request('POST', '/test', headers={'Transfer-Encoding': 'chunked'}) + conn.send(b'x\r\nbody\r\n0\r\n\r\n') + r = conn.getresponse() + self.ae(r.status, httplib.BAD_REQUEST) + + server.log.filter_level = orig_level + conn = server.connect() # Test pipelining responses = [] for i in xrange(10): @@ -112,10 +134,11 @@ class TestHTTP(BaseTest): # Test closing conn.request('GET', '/close', headers={'Connection':'close'}) + self.ae(server.loop.requests.busy, 1) r = conn.getresponse() self.ae(r.status, 200), self.ae(r.read(), 'close') - conn.request('HEAD', '/close') - with self.assertRaises(socket.error): - conn.sock.send(b'xxx') + self.ae(server.loop.requests.busy, 0) + self.assertIsNone(conn.sock) + self.ae(server.loop.requests.idle, 10) # }}}