Small further optimization for readline()

This commit is contained in:
Kovid Goyal 2015-05-26 07:33:25 +05:30
parent d5d638dd9b
commit a43fdc9871
2 changed files with 27 additions and 11 deletions

View File

@ -13,7 +13,7 @@ from urllib import unquote
from calibre import as_unicode, force_unicode from calibre import as_unicode, force_unicode
from calibre.ptempfile import SpooledTemporaryFile from calibre.ptempfile import SpooledTemporaryFile
from calibre.srv.loop import Connection, READ, WRITE from calibre.srv.loop import Connection, READ, WRITE
from calibre.srv.utils import MultiDict, HTTP1, HTTP11 from calibre.srv.utils import MultiDict, HTTP1, HTTP11, Accumulator
protocol_map = {(1, 0):HTTP1, (1, 1):HTTP11} protocol_map = {(1, 0):HTTP1, (1, 1):HTTP11}
quoted_slash = re.compile(br'%2[fF]') quoted_slash = re.compile(br'%2[fF]')
@ -171,13 +171,12 @@ class HTTPRequest(Connection):
def readline(self, buf): def readline(self, buf):
line = self.read_buffer.readline() line = self.read_buffer.readline()
buf.write(line) buf.append(line)
if buf.tell() > self.max_header_line_size: if buf.total_length > self.max_header_line_size:
self.simple_response(self.header_line_too_long_error_code) self.simple_response(self.header_line_too_long_error_code)
return return
if line.endswith(b'\n'): if line.endswith(b'\n'):
line = buf.getvalue() line = buf.getvalue()
buf.seek(0), buf.truncate()
if not line.endswith(b'\r\n'): if not line.endswith(b'\r\n'):
self.simple_response(httplib.BAD_REQUEST, 'HTTP requires CRLF line terminators') self.simple_response(httplib.BAD_REQUEST, 'HTTP requires CRLF line terminators')
return return
@ -194,7 +193,7 @@ class HTTPRequest(Connection):
self.close_after_response = False self.close_after_response = False
self.header_line_too_long_error_code = httplib.REQUEST_URI_TOO_LONG self.header_line_too_long_error_code = httplib.REQUEST_URI_TOO_LONG
self.response_started = False self.response_started = False
self.set_state(READ, self.parse_request_line, BytesIO(), first=True) self.set_state(READ, self.parse_request_line, Accumulator(), first=True)
def parse_request_line(self, buf, event, first=False): # {{{ def parse_request_line(self, buf, event, first=False): # {{{
line = self.readline(buf) line = self.readline(buf)
@ -203,7 +202,7 @@ class HTTPRequest(Connection):
if line == b'\r\n': if line == b'\r\n':
# Ignore a single leading empty line, as per RFC 2616 sec 4.1 # Ignore a single leading empty line, as per RFC 2616 sec 4.1
if first: if first:
return self.set_state(READ, self.parse_request_line, BytesIO()) return self.set_state(READ, self.parse_request_line, Accumulator())
return self.simple_response(httplib.BAD_REQUEST, 'Multiple leading empty lines not allowed') return self.simple_response(httplib.BAD_REQUEST, 'Multiple leading empty lines not allowed')
try: try:
@ -246,7 +245,7 @@ class HTTPRequest(Connection):
self.path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/')))) self.path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/'))))
self.header_line_too_long_error_code = httplib.REQUEST_ENTITY_TOO_LARGE self.header_line_too_long_error_code = httplib.REQUEST_ENTITY_TOO_LARGE
self.request_line = line.rstrip() self.request_line = line.rstrip()
self.set_state(READ, self.parse_header_line, HTTPHeaderParser(), BytesIO()) self.set_state(READ, self.parse_header_line, HTTPHeaderParser(), Accumulator())
# }}} # }}}
@property @property
@ -310,7 +309,7 @@ class HTTPRequest(Connection):
def read_request_body(self, inheaders, request_content_length, chunked_read): def read_request_body(self, inheaders, request_content_length, chunked_read):
buf = SpooledTemporaryFile(prefix='rq-body-', max_size=DEFAULT_BUFFER_SIZE, dir=self.tdir) buf = SpooledTemporaryFile(prefix='rq-body-', max_size=DEFAULT_BUFFER_SIZE, dir=self.tdir)
if chunked_read: if chunked_read:
self.set_state(READ, self.read_chunk_length, inheaders, BytesIO(), buf, [0]) self.set_state(READ, self.read_chunk_length, inheaders, Accumulator(), buf, [0])
else: else:
if request_content_length > 0: if request_content_length > 0:
self.set_state(READ, self.sized_read, inheaders, buf, request_content_length) self.set_state(READ, self.sized_read, inheaders, buf, request_content_length)
@ -334,7 +333,7 @@ class HTTPRequest(Connection):
return self.simple_response(httplib.REQUEST_ENTITY_TOO_LARGE, return self.simple_response(httplib.REQUEST_ENTITY_TOO_LARGE,
'Chunked request is larger than %d bytes' % self.max_request_body_size) 'Chunked request is larger than %d bytes' % self.max_request_body_size)
if chunk_size == 0: if chunk_size == 0:
self.set_state(READ, self.read_chunk_separator, inheaders, BytesIO(), buf, bytes_read, last=True) self.set_state(READ, self.read_chunk_separator, inheaders, Accumulator(), buf, bytes_read, last=True)
else: else:
self.set_state(READ, self.read_chunk, inheaders, buf, chunk_size, buf.tell() + chunk_size, bytes_read) self.set_state(READ, self.read_chunk, inheaders, buf, chunk_size, buf.tell() + chunk_size, bytes_read)
@ -342,7 +341,7 @@ class HTTPRequest(Connection):
if not self.read(buf, end): if not self.read(buf, end):
return return
bytes_read[0] += chunk_size bytes_read[0] += chunk_size
self.set_state(READ, self.read_chunk_separator, inheaders, BytesIO(), buf, bytes_read) self.set_state(READ, self.read_chunk_separator, inheaders, Accumulator(), buf, bytes_read)
def read_chunk_separator(self, inheaders, line_buf, buf, bytes_read, event, last=False): def read_chunk_separator(self, inheaders, line_buf, buf, bytes_read, event, last=False):
line = self.readline(line_buf) line = self.readline(line_buf)
@ -357,7 +356,7 @@ class HTTPRequest(Connection):
if last: if last:
self.prepare_response(inheaders, buf) self.prepare_response(inheaders, buf)
else: else:
self.set_state(READ, self.read_chunk_length, inheaders, BytesIO(), buf, bytes_read) self.set_state(READ, self.read_chunk_length, inheaders, Accumulator(), buf, bytes_read)
def handle_timeout(self): def handle_timeout(self):
if self.response_started: if self.response_started:

View File

@ -219,3 +219,20 @@ class HandleInterrupt(object): # {{{
raise WindowsError() raise WindowsError()
# }}} # }}}
class Accumulator(object):
'Optimized replacement for BytesIO when the usage pattern is many writes followed by a single getvalue()'
def __init__(self):
self._buf = []
self.total_length = 0
def append(self, b):
self._buf.append(b)
self.total_length += len(b)
def getvalue(self):
ans = b''.join(self._buf)
self._buf = []
self.total_length = 0
return ans