CS: Implement access logging

This commit is contained in:
Kovid Goyal 2016-02-23 14:08:06 +05:30
parent 4ccb505fa4
commit 7f457019fb
5 changed files with 56 additions and 14 deletions

View File

@ -246,6 +246,7 @@ class HTTPRequest(Connection):
line = self.readline(buf) line = self.readline(buf)
if line is None: if line is None:
return return
self.request_line = line.rstrip()
if line == b'\r\n': if line == b'\r\n':
# Ignore a single leading empty line, as per RFC 2616 sec 4.1 # Ignore a single leading empty line, as per RFC 2616 sec 4.1
if first: if first:
@ -272,7 +273,6 @@ class HTTPRequest(Connection):
except HTTPSimpleResponse as e: except HTTPSimpleResponse as e:
return self.simple_response(e.http_code, e.message, close_after_response=False) return self.simple_response(e.http_code, e.message, close_after_response=False)
self.header_line_too_long_error_code = httplib.REQUEST_ENTITY_TOO_LARGE self.header_line_too_long_error_code = httplib.REQUEST_ENTITY_TOO_LARGE
self.request_line = line.rstrip()
self.set_state(READ, self.parse_header_line, HTTPHeaderParser(), Accumulator()) self.set_state(READ, self.parse_header_line, HTTPHeaderParser(), Accumulator())
# }}} # }}}

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import os, httplib, hashlib, uuid, struct, repr as reprlib import os, httplib, hashlib, uuid, struct, repr as reprlib, time
from collections import namedtuple from collections import namedtuple
from io import BytesIO, DEFAULT_BUFFER_SIZE from io import BytesIO, DEFAULT_BUFFER_SIZE
from itertools import chain, repeat, izip_longest from itertools import chain, repeat, izip_longest
@ -395,7 +395,9 @@ class HTTPConnection(HTTPRequest):
buf = [(x + '\r\n').encode('ascii') for x in buf] buf = [(x + '\r\n').encode('ascii') for x in buf]
if self.method != 'HEAD': if self.method != 'HEAD':
buf.append(msg) buf.append(msg)
self.response_ready(ReadOnlyFileBuffer(b''.join(buf))) response_data = b''.join(buf)
self.log_access(status_code=status_code, response_size=len(response_data))
self.response_ready(ReadOnlyFileBuffer(response_data))
def prepare_response(self, inheaders, request_body_file): def prepare_response(self, inheaders, request_body_file):
if self.method == 'TRACE': if self.method == 'TRACE':
@ -420,7 +422,9 @@ class HTTPConnection(HTTPRequest):
"Date: " + http_date(), "Date: " + http_date(),
"Content-Range: bytes */%d" % content_length, "Content-Range: bytes */%d" % content_length,
] ]
self.response_ready(header_list_to_file(buf)) response_data = header_list_to_file(buf)
self.log_access(status_code=httplib.REQUESTED_RANGE_NOT_SATISFIABLE, response_size=response_data.sz)
self.response_ready(response_data)
def send_not_modified(self, etag=None): def send_not_modified(self, etag=None):
buf = [ buf = [
@ -430,7 +434,9 @@ class HTTPConnection(HTTPRequest):
] ]
if etag is not None: if etag is not None:
buf.append('ETag: ' + etag) buf.append('ETag: ' + etag)
self.response_ready(header_list_to_file(buf)) response_data = header_list_to_file(buf)
self.log_access(status_code=httplib.NOT_MODIFIED, response_size=response_data.sz)
self.response_ready(response_data)
def report_busy(self): def report_busy(self):
self.simple_response(httplib.SERVICE_UNAVAILABLE) self.simple_response(httplib.SERVICE_UNAVAILABLE)
@ -482,7 +488,25 @@ class HTTPConnection(HTTPRequest):
x = x.decode('ascii') x = x.decode('ascii')
buf.append(x) buf.append(x)
buf.append('') buf.append('')
self.response_ready(ReadOnlyFileBuffer(b''.join((x + '\r\n').encode('ascii') for x in buf)), output=output) response_data = ReadOnlyFileBuffer(b''.join((x + '\r\n').encode('ascii') for x in buf))
if self.access_log is not None:
sz = outheaders.get('Content-Length')
if sz is not None:
sz = int(sz) + response_data.sz
self.log_access(status_code=data.status_code, response_size=sz, username=data.username)
self.response_ready(response_data, output=output)
def log_access(self, status_code, response_size=None, username=None):
if self.access_log is None:
return
if not self.opts.log_not_found and status_code == httplib.NOT_FOUND:
return
line = '%s port-%s %s %s "%s" %s %s' % (
self.remote_addr, self.remote_port, username or '-',
time.strftime('%d/%b/%Y:%H:%M:%S %z'),
force_unicode(self.request_line or '', 'utf-8'),
status_code, ('-' if response_size is None else response_size))
self.access_log(line)
def response_ready(self, header_file, output=None): def response_ready(self, header_file, output=None):
self.response_started = True self.response_started = True
@ -612,7 +636,7 @@ class HTTPConnection(HTTPRequest):
return self.send_range_not_satisfiable(output.content_length) return self.send_range_not_satisfiable(output.content_length)
for header in ('Accept-Ranges', 'Content-Encoding', 'Transfer-Encoding', 'ETag', 'Content-Length'): for header in ('Accept-Ranges', 'Content-Encoding', 'Transfer-Encoding', 'ETag', 'Content-Length'):
outheaders.pop('header', all=True) outheaders.pop(header, all=True)
matched = '*' in none_match or (output.etag and output.etag in none_match) matched = '*' in none_match or (output.etag and output.etag in none_match)
if matched: if matched:

View File

@ -117,8 +117,8 @@ class ReadBuffer(object): # {{{
class Connection(object): # {{{ class Connection(object): # {{{
def __init__(self, socket, opts, ssl_context, tdir, addr, pool, log, wakeup): def __init__(self, socket, opts, ssl_context, tdir, addr, pool, log, access_log, wakeup):
self.opts, self.pool, self.log, self.wakeup = opts, pool, log, wakeup self.opts, self.pool, self.log, self.wakeup, self.access_log = opts, pool, log, wakeup, access_log
try: try:
self.remote_addr = addr[0] self.remote_addr = addr[0]
self.remote_port = addr[1] self.remote_port = addr[1]
@ -304,12 +304,16 @@ class ServerLoop(object):
plugins=(), plugins=(),
# A calibre logging object. If None, a default log that logs to # A calibre logging object. If None, a default log that logs to
# stdout is used # stdout is used
log=None log=None,
# A calibre logging object for access logging, by default no access
# logging is performed
access_log=None
): ):
self.ready = False self.ready = False
self.handler = handler self.handler = handler
self.opts = opts or Options() self.opts = opts or Options()
self.log = log or ThreadSafeLog(level=ThreadSafeLog.DEBUG) self.log = log or ThreadSafeLog(level=ThreadSafeLog.DEBUG)
self.access_log = access_log
ba = (self.opts.listen_on, int(self.opts.port)) ba = (self.opts.listen_on, int(self.opts.port))
if not ba[0]: if not ba[0]:
@ -558,7 +562,8 @@ class ServerLoop(object):
if sock is not None: if sock is not None:
s = sock.fileno() s = sock.fileno()
if s > -1: if s > -1:
self.connection_map[s] = conn = self.handler(sock, self.opts, self.ssl_context, self.tdir, addr, self.pool, self.log, self.wakeup) self.connection_map[s] = conn = self.handler(
sock, self.opts, self.ssl_context, self.tdir, addr, self.pool, self.log, self.access_log, self.wakeup)
if self.ssl_context is not None: if self.ssl_context is not None:
yield s, conn, RDWR yield s, conn, RDWR
elif s == control: elif s == control:

View File

@ -103,6 +103,12 @@ raw_options = (
'The maximum size of log files, generated by the server. When the log becomes larger' 'The maximum size of log files, generated by the server. When the log becomes larger'
' than this size, it is automatically rotated.', ' than this size, it is automatically rotated.',
'Enable/disable logging of not found http requests',
'log_not_found', True,
'By default, the server logs all HTTP requests for resources that are not found.'
' Tis can generate a lot of log spam, if your server is targeted by bots.'
' Use this option to turn it off.',
'Enable/disable password based authentication to access the server', 'Enable/disable password based authentication to access the server',
'auth', False, 'auth', False,
'By default, the server is unrestricted, allowing anyone to access it. You can' 'By default, the server is unrestricted, allowing anyone to access it. You can'

View File

@ -57,14 +57,16 @@ def daemonize(): # {{{
class Server(object): class Server(object):
def __init__(self, libraries, opts): def __init__(self, libraries, opts):
log = None log = access_log = None
if opts.log: if opts.log:
log = RotatingLog(opts.log, max_size=opts.max_log_size) log = RotatingLog(opts.log, max_size=opts.max_log_size)
if opts.access_log:
access_log = RotatingLog(opts.access_log, max_size=opts.max_log_size)
self.handler = Handler(libraries, opts) self.handler = Handler(libraries, opts)
plugins = [] plugins = []
if opts.use_bonjour: if opts.use_bonjour:
plugins.append(BonJour()) plugins.append(BonJour())
self.loop = ServerLoop(create_http_handler(self.handler.dispatch), opts=opts, log=log, plugins=plugins) self.loop = ServerLoop(create_http_handler(self.handler.dispatch), opts=opts, log=log, access_log=access_log, plugins=plugins)
self.handler.set_log(self.loop.log) self.handler.set_log(self.loop.log)
self.serve_forever = self.loop.serve_forever self.serve_forever = self.loop.serve_forever
self.stop = self.loop.stop self.stop = self.loop.stop
@ -181,7 +183,12 @@ program will be used.
)) ))
parser.add_option( parser.add_option(
'--log', default=None, '--log', default=None,
help=_('Path to log file for server log')) help=_('Path to log file for server log. This log contains server information and errors, not access logs. By default it is written to stdout.'))
parser.add_option(
'--access-log', default=None,
help=_('Path to the access log file. This log contains information'
' about clients connecting to the server and making requests. By'
' default no access logging is done.'))
parser.add_option('--daemonize', default=False, action='store_true', parser.add_option('--daemonize', default=False, action='store_true',
help=_('Run process in background as a daemon. No effect on Windows.')) help=_('Run process in background as a daemon. No effect on Windows.'))
parser.add_option('--pidfile', default=None, parser.add_option('--pidfile', default=None,