mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement parsing of HTTP requests
This commit is contained in:
parent
1617721b99
commit
656d0a1c10
@ -6,19 +6,132 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import httplib, socket
|
||||
import httplib, socket, re
|
||||
from urllib import unquote
|
||||
from urlparse import parse_qs
|
||||
from functools import partial
|
||||
|
||||
from calibre import as_unicode
|
||||
from calibre.srv.errors import MaxSizeExceeded, NonHTTPConnRequest
|
||||
|
||||
HTTP1 = 'HTTP/1.0'
|
||||
HTTP11 = 'HTTP/1.1'
|
||||
protocol_map = {(1, 0):HTTP1, (1, 1):HTTP11}
|
||||
quoted_slash = re.compile(br'%2[fF]')
|
||||
|
||||
def parse_request_uri(uri): # {{{
|
||||
"""Parse a Request-URI into (scheme, authority, path).
|
||||
|
||||
Note that Request-URI's must be one of::
|
||||
|
||||
Request-URI = "*" | absoluteURI | abs_path | authority
|
||||
|
||||
Therefore, a Request-URI which starts with a double forward-slash
|
||||
cannot be a "net_path"::
|
||||
|
||||
net_path = "//" authority [ abs_path ]
|
||||
|
||||
Instead, it must be interpreted as an "abs_path" with an empty first
|
||||
path segment::
|
||||
|
||||
abs_path = "/" path_segments
|
||||
path_segments = segment *( "/" segment )
|
||||
segment = *pchar *( ";" param )
|
||||
param = *pchar
|
||||
"""
|
||||
if uri == b'*':
|
||||
return None, None, uri
|
||||
|
||||
i = uri.find(b'://')
|
||||
if i > 0 and b'?' not in uri[:i]:
|
||||
# An absoluteURI.
|
||||
# If there's a scheme (and it must be http or https), then:
|
||||
# http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query
|
||||
# ]]
|
||||
scheme, remainder = uri[:i].lower(), uri[i + 3:]
|
||||
authority, path = remainder.split(b'/', 1)
|
||||
path = b'/' + path
|
||||
return scheme, authority, path
|
||||
|
||||
if uri.startswith(b'/'):
|
||||
# An abs_path.
|
||||
return None, None, uri
|
||||
else:
|
||||
# An authority.
|
||||
return None, uri, None
|
||||
# }}}
|
||||
|
||||
comma_separated_headers = {
|
||||
b'Accept', b'Accept-Charset', b'Accept-Encoding',
|
||||
b'Accept-Language', b'Accept-Ranges', b'Allow', b'Cache-Control',
|
||||
b'Connection', b'Content-Encoding', b'Content-Language', b'Expect',
|
||||
b'If-Match', b'If-None-Match', b'Pragma', b'Proxy-Authenticate', b'TE',
|
||||
b'Trailer', b'Transfer-Encoding', b'Upgrade', b'Vary', b'Via', b'Warning',
|
||||
b'WWW-Authenticate'
|
||||
}
|
||||
|
||||
|
||||
def read_headers(readline, max_line_size, hdict=None): # {{{
|
||||
"""
|
||||
Read headers from the given stream into the given header dict.
|
||||
|
||||
If hdict is None, a new header dict is created. Returns the populated
|
||||
header dict.
|
||||
|
||||
Headers which are repeated are folded together using a comma if their
|
||||
specification so dictates.
|
||||
|
||||
This function raises ValueError when the read bytes violate the HTTP spec.
|
||||
You should probably return "400 Bad Request" if this happens.
|
||||
"""
|
||||
if hdict is None:
|
||||
hdict = {}
|
||||
|
||||
while True:
|
||||
line = readline()
|
||||
if not line:
|
||||
# No more data--illegal end of headers
|
||||
raise ValueError("Illegal end of headers.")
|
||||
|
||||
if line == b'\r\n':
|
||||
# Normal end of headers
|
||||
break
|
||||
if not line.endswith(b'\r\n'):
|
||||
raise ValueError("HTTP requires CRLF terminators")
|
||||
|
||||
if line[0] in (b' ', b'\t'):
|
||||
# It's a continuation line.
|
||||
v = line.strip()
|
||||
else:
|
||||
try:
|
||||
k, v = line.split(b':', 1)
|
||||
except ValueError:
|
||||
raise ValueError("Illegal header line.")
|
||||
k = k.strip().title()
|
||||
v = v.strip()
|
||||
hname = k.decode('ascii')
|
||||
|
||||
if k in comma_separated_headers:
|
||||
existing = hdict.get(hname)
|
||||
if existing:
|
||||
v = b", ".join((existing, v))
|
||||
try:
|
||||
v = v.decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
if hname in 'Transfer-Encoding Connection Keep-Alive Expect':
|
||||
raise
|
||||
hdict[hname] = v
|
||||
|
||||
return hdict
|
||||
# }}}
|
||||
|
||||
def http_communicate(conn):
|
||||
' Represents interaction with a http client over a single, persistent connection '
|
||||
request_seen = False
|
||||
try:
|
||||
while True:
|
||||
# (re)set req to None so that if something goes wrong in
|
||||
# the RequestHandlerClass constructor, the error doesn't
|
||||
# the HTTPPair constructor, the error doesn't
|
||||
# get written to the previous request.
|
||||
req = None
|
||||
req = conn.server_loop.http_handler(conn)
|
||||
@ -60,7 +173,8 @@ class HTTPPair(object):
|
||||
def __init__(self, conn):
|
||||
self.conn = conn
|
||||
self.server_loop = conn.server_loop
|
||||
self.scheme = b'http' if self.server_loop.ssl_context is None else b'https'
|
||||
self.max_header_line_size = self.server_loop.max_header_line_size
|
||||
self.scheme = 'http' if self.server_loop.ssl_context is None else 'https'
|
||||
self.inheaders = {}
|
||||
self.outheaders = []
|
||||
|
||||
@ -103,6 +217,127 @@ class HTTPPair(object):
|
||||
|
||||
self.ready = True
|
||||
|
||||
def read_request_line(self):
|
||||
request_line = self.conn.socket_file.readline(maxsize=self.max_header_line_size)
|
||||
|
||||
# Set started_request to True so http_communicate() knows to send 408
|
||||
# from here on out.
|
||||
self.started_request = True
|
||||
if not request_line:
|
||||
return False
|
||||
|
||||
if request_line == b'\r\n':
|
||||
# RFC 2616 sec 4.1: "...if the server is reading the protocol
|
||||
# stream at the beginning of a message and receives a CRLF
|
||||
# first, it should ignore the CRLF."
|
||||
# But only ignore one leading line! else we enable a DoS.
|
||||
request_line = self.conn.socket_file.readline(maxsize=self.max_header_line_size)
|
||||
if not request_line:
|
||||
return False
|
||||
|
||||
if not request_line.endswith(b'\r\n'):
|
||||
self.simple_response(
|
||||
httplib.BAD_REQUEST, 'Bad Request', "HTTP requires CRLF terminators")
|
||||
return False
|
||||
|
||||
try:
|
||||
method, uri, req_protocol = request_line.strip().split(b' ', 2)
|
||||
rp = int(req_protocol[5]), int(req_protocol[7])
|
||||
self.method = method.decode('ascii')
|
||||
except (ValueError, IndexError):
|
||||
self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Malformed Request-Line")
|
||||
return False
|
||||
|
||||
try:
|
||||
self.request_protocol = protocol_map[rp]
|
||||
except KeyError:
|
||||
self.simple_response(httplib.HTTP_VERSION_NOT_SUPPORTED, "HTTP Version Not Supported")
|
||||
return False
|
||||
|
||||
scheme, authority, path = parse_request_uri(uri)
|
||||
if b'#' in path:
|
||||
self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Illegal #fragment in Request-URI.")
|
||||
return False
|
||||
|
||||
if scheme:
|
||||
try:
|
||||
self.scheme = scheme.decode('ascii')
|
||||
except ValueError:
|
||||
self.simple_response(httplib.BAD_REQUEST, "Bad Request", 'Un-decodeable scheme')
|
||||
return False
|
||||
|
||||
qs = b''
|
||||
if b'?' in path:
|
||||
path, qs = path.split(b'?', 1)
|
||||
try:
|
||||
self.qs = {k.decode('utf-8'):tuple(x.decode('utf-8') for x in v) for k, v in parse_qs(qs, keep_blank_values=True).iteritems()}
|
||||
except Exception:
|
||||
self.simple_response(httplib.BAD_REQUEST, "Bad Request", "Malformed Request-Line",
|
||||
'Unparseable query string')
|
||||
return False
|
||||
|
||||
try:
|
||||
path = '%2F'.join(unquote(x).decode('utf-8') for x in quoted_slash.split(path))
|
||||
except ValueError as e:
|
||||
self.simple_response(httplib.BAD_REQUEST, "Bad Request", as_unicode(e))
|
||||
return False
|
||||
self.path = tuple(x.replace('%2F', '/') for x in path.split('/'))
|
||||
|
||||
self.response_protocol = protocol_map[min((1, 1), rp)]
|
||||
|
||||
return True
|
||||
|
||||
def read_request_headers(self):
|
||||
# then all the http headers
|
||||
try:
|
||||
read_headers(partial(self.conn.socket_file.readline, maxsize=self.max_header_line_size), self.inheaders)
|
||||
content_length = int(self.inheaders.get('Content-Length', 0))
|
||||
except ValueError as e:
|
||||
self.simple_response(httplib.BAD_REQUEST, "Bad Request", as_unicode(e))
|
||||
return False
|
||||
|
||||
if content_length > self.server_loop.max_request_body_size:
|
||||
self.simple_response(
|
||||
httplib.REQUEST_ENTITY_TOO_LARGE, "Request Entity Too Large",
|
||||
"The entity sent with the request exceeds the maximum "
|
||||
"allowed bytes (%d)." % self.server_loop.max_request_body_size)
|
||||
return False
|
||||
|
||||
# Persistent connection support
|
||||
if self.response_protocol is HTTP11:
|
||||
# Both server and client are HTTP/1.1
|
||||
if self.inheaders.get("Connection", "") == "close":
|
||||
self.close_connection = True
|
||||
else:
|
||||
# Either the server or client (or both) are HTTP/1.0
|
||||
if self.inheaders.get("Connection", "") != "Keep-Alive":
|
||||
self.close_connection = True
|
||||
|
||||
# Transfer-Encoding support
|
||||
te = ()
|
||||
if self.response_protocol is HTTP11:
|
||||
rte = self.inheaders.get("Transfer-Encoding")
|
||||
if rte:
|
||||
te = [x.strip().lower() for x in rte.split(",") if x.strip()]
|
||||
self.chunked_read = False
|
||||
if te:
|
||||
for enc in te:
|
||||
if enc == "chunked":
|
||||
self.chunked_read = True
|
||||
else:
|
||||
# Note that, even if we see "chunked", we must reject
|
||||
# if there is an extension we don't recognize.
|
||||
self.simple_response(httplib.NOT_IMPLEMENTED, "Not Implemented", "Unknown transfer encoding: %s" % enc)
|
||||
self.close_connection = True
|
||||
return False
|
||||
|
||||
if self.inheaders.get("Expect", '').lower() == "100-continue":
|
||||
# Don't use simple_response here, because it emits headers
|
||||
# we don't want.
|
||||
msg = HTTP11 + " 100 Continue\r\n\r\n"
|
||||
self.flushed_write(msg.encode('ascii'))
|
||||
return True
|
||||
|
||||
def simple_response(self, status_code, status_text, msg=""):
|
||||
abort = status_code in (httplib.REQUEST_ENTITY_TOO_LARGE, httplib.REQUEST_URI_TOO_LONG)
|
||||
if abort:
|
||||
|
@ -546,6 +546,12 @@ class ServerLoop(object):
|
||||
# socket activation
|
||||
allow_socket_preallocation=True,
|
||||
|
||||
# Max. size of single header
|
||||
max_header_line_size=8192, # 8 KB
|
||||
|
||||
# Max. size of a request
|
||||
max_request_body_size=500 * 1024 * 1024,
|
||||
|
||||
# no_delay turns on TCP_NODELAY which decreases latency at the cost of
|
||||
# worse overall performance when sending multiple small packets. It
|
||||
# prevents the TCP stack from aggregating multiple small TCP packets.
|
||||
@ -568,6 +574,8 @@ class ServerLoop(object):
|
||||
self.no_delay = no_delay
|
||||
self.request_queue_size = request_queue_size
|
||||
self.timeout = timeout
|
||||
self.max_header_line_size = max_header_line_size
|
||||
self.max_request_body_size = max_request_body_size
|
||||
self.shutdown_timeout = shutdown_timeout
|
||||
ba = bind_address
|
||||
if not isinstance(ba, basestring):
|
||||
|
Loading…
x
Reference in New Issue
Block a user