diff --git a/src/calibre/srv/http.py b/src/calibre/srv/http.py index 1e7366a201..dcab81b237 100644 --- a/src/calibre/srv/http.py +++ b/src/calibre/srv/http.py @@ -6,19 +6,132 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' -import httplib, socket +import httplib, socket, re +from urllib import unquote +from urlparse import parse_qs +from functools import partial +from calibre import as_unicode from calibre.srv.errors import MaxSizeExceeded, NonHTTPConnRequest HTTP1 = 'HTTP/1.0' HTTP11 = 'HTTP/1.1' +protocol_map = {(1, 0):HTTP1, (1, 1):HTTP11} +quoted_slash = re.compile(br'%2[fF]') + +def parse_request_uri(uri): # {{{ + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == b'*': + return None, None, uri + + i = uri.find(b'://') + if i > 0 and b'?' not in uri[:i]: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query + # ]] + scheme, remainder = uri[:i].lower(), uri[i + 3:] + authority, path = remainder.split(b'/', 1) + path = b'/' + path + return scheme, authority, path + + if uri.startswith(b'/'): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None +# }}} + +comma_separated_headers = { + b'Accept', b'Accept-Charset', b'Accept-Encoding', + b'Accept-Language', b'Accept-Ranges', b'Allow', b'Cache-Control', + b'Connection', b'Content-Encoding', b'Content-Language', b'Expect', + b'If-Match', b'If-None-Match', b'Pragma', b'Proxy-Authenticate', b'TE', + b'Trailer', b'Transfer-Encoding', b'Upgrade', b'Vary', b'Via', b'Warning', + b'WWW-Authenticate' +} + + +def read_headers(readline, max_line_size, hdict=None): # {{{ + """ + Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == b'\r\n': + # Normal end of headers + break + if not line.endswith(b'\r\n'): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (b' ', b'\t'): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(b':', 1) + except ValueError: + raise ValueError("Illegal header line.") + k = k.strip().title() + v = v.strip() + hname = k.decode('ascii') + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = b", ".join((existing, v)) + try: + v = v.decode('ascii') + except UnicodeDecodeError: + if hname in 'Transfer-Encoding Connection Keep-Alive Expect': + raise + hdict[hname] = v + + return hdict +# }}} def http_communicate(conn): + ' Represents interaction with a http client over a single, persistent connection ' request_seen = False try: while True: # (re)set req to None so that if something goes wrong in - # the RequestHandlerClass constructor, the error doesn't + # the HTTPPair constructor, the error doesn't # get written to the previous request. req = None req = conn.server_loop.http_handler(conn) @@ -60,7 +173,8 @@ class HTTPPair(object): def __init__(self, conn): self.conn = conn self.server_loop = conn.server_loop - self.scheme = b'http' if self.server_loop.ssl_context is None else b'https' + self.max_header_line_size = self.server_loop.max_header_line_size + self.scheme = 'http' if self.server_loop.ssl_context is None else 'https' self.inheaders = {} self.outheaders = [] @@ -103,6 +217,127 @@ class HTTPPair(object): self.ready = True + def read_request_line(self): + request_line = self.conn.socket_file.readline(maxsize=self.max_header_line_size) + + # Set started_request to True so http_communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == b'\r\n': + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.conn.socket_file.readline(maxsize=self.max_header_line_size) + if not request_line: + return False + + if not request_line.endswith(b'\r\n'): + self.simple_response( + httplib.BAD_REQUEST, 'Bad Request', "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(b' ', 2) + rp = int(req_protocol[5]), int(req_protocol[7]) + self.method = method.decode('ascii') + except (ValueError, IndexError): + self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Malformed Request-Line") + return False + + try: + self.request_protocol = protocol_map[rp] + except KeyError: + self.simple_response(httplib.HTTP_VERSION_NOT_SUPPORTED, "HTTP Version Not Supported") + return False + + scheme, authority, path = parse_request_uri(uri) + if b'#' in path: + self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Illegal #fragment in Request-URI.") + return False + + if scheme: + try: + self.scheme = scheme.decode('ascii') + except ValueError: + self.simple_response(httplib.BAD_REQUEST, "Bad Request", 'Un-decodeable scheme') + return False + + qs = b'' + if b'?' in path: + path, qs = path.split(b'?', 1) + try: + self.qs = {k.decode('utf-8'):tuple(x.decode('utf-8') for x in v) for k, v in parse_qs(qs, keep_blank_values=True).iteritems()} + except Exception: + self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Malformed Request-Line", + 'Unparseable query string') + return False + + try: + path = '%2F'.join(unquote(x).decode('utf-8') for x in quoted_slash.split(path)) + except ValueError as e: + self.simple_response(httplib.BAD_REQUEST, "Bad Request", as_unicode(e)) + return False + self.path = tuple(x.replace('%2F', '/') for x in path.split('/')) + + self.response_protocol = protocol_map[min((1, 1), rp)] + + return True + + def read_request_headers(self): + # then all the http headers + try: + read_headers(partial(self.conn.socket_file.readline, maxsize=self.max_header_line_size), self.inheaders) + content_length = int(self.inheaders.get('Content-Length', 0)) + except ValueError as e: + self.simple_response(httplib.BAD_REQUEST, "Bad Request", as_unicode(e)) + return False + + if content_length > self.server_loop.max_request_body_size: + self.simple_response( + httplib.REQUEST_ENTITY_TOO_LARGE, "Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes (%d)." % self.server_loop.max_request_body_size) + return False + + # Persistent connection support + if self.response_protocol is HTTP11: + # Both server and client are HTTP/1.1 + if self.inheaders.get("Connection", "") == "close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get("Connection", "") != "Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = () + if self.response_protocol is HTTP11: + rte = self.inheaders.get("Transfer-Encoding") + if rte: + te = [x.strip().lower() for x in rte.split(",") if x.strip()] + self.chunked_read = False + if te: + for enc in te: + if enc == "chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response(httplib.NOT_IMPLEMENTED, "Not Implemented", "Unknown transfer encoding: %s" % enc) + self.close_connection = True + return False + + if self.inheaders.get("Expect", '').lower() == "100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. + msg = HTTP11 + " 100 Continue\r\n\r\n" + self.flushed_write(msg.encode('ascii')) + return True + def simple_response(self, status_code, status_text, msg=""): abort = status_code in (httplib.REQUEST_ENTITY_TOO_LARGE, httplib.REQUEST_URI_TOO_LONG) if abort: diff --git a/src/calibre/srv/loop.py b/src/calibre/srv/loop.py index 9918697e84..ec858bbcfc 100644 --- a/src/calibre/srv/loop.py +++ b/src/calibre/srv/loop.py @@ -546,6 +546,12 @@ class ServerLoop(object): # socket activation allow_socket_preallocation=True, + # Max. size of single header + max_header_line_size=8192, # 8 KB + + # Max. size of a request + max_request_body_size=500 * 1024 * 1024, + # no_delay turns on TCP_NODELAY which decreases latency at the cost of # worse overall performance when sending multiple small packets. It # prevents the TCP stack from aggregating multiple small TCP packets. @@ -568,6 +574,8 @@ class ServerLoop(object): self.no_delay = no_delay self.request_queue_size = request_queue_size self.timeout = timeout + self.max_header_line_size = max_header_line_size + self.max_request_body_size = max_request_body_size self.shutdown_timeout = shutdown_timeout ba = bind_address if not isinstance(ba, basestring):