From 7f457019fbb3b862d5a564104c79b90dc1297f29 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 23 Feb 2016 14:08:06 +0530 Subject: [PATCH] CS: Implement access logging --- src/calibre/srv/http_request.py | 2 +- src/calibre/srv/http_response.py | 36 ++++++++++++++++++++++++++------ src/calibre/srv/loop.py | 13 ++++++++---- src/calibre/srv/opts.py | 6 ++++++ src/calibre/srv/standalone.py | 13 +++++++++--- 5 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/calibre/srv/http_request.py b/src/calibre/srv/http_request.py index 913a86dfa3..12e9af6c26 100644 --- a/src/calibre/srv/http_request.py +++ b/src/calibre/srv/http_request.py @@ -246,6 +246,7 @@ class HTTPRequest(Connection): line = self.readline(buf) if line is None: return + self.request_line = line.rstrip() if line == b'\r\n': # Ignore a single leading empty line, as per RFC 2616 sec 4.1 if first: @@ -272,7 +273,6 @@ class HTTPRequest(Connection): except HTTPSimpleResponse as e: return self.simple_response(e.http_code, e.message, close_after_response=False) self.header_line_too_long_error_code = httplib.REQUEST_ENTITY_TOO_LARGE - self.request_line = line.rstrip() self.set_state(READ, self.parse_header_line, HTTPHeaderParser(), Accumulator()) # }}} diff --git a/src/calibre/srv/http_response.py b/src/calibre/srv/http_response.py index e60249a058..093c218ee7 100644 --- a/src/calibre/srv/http_response.py +++ b/src/calibre/srv/http_response.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' -import os, httplib, hashlib, uuid, struct, repr as reprlib +import os, httplib, hashlib, uuid, struct, repr as reprlib, time from collections import namedtuple from io import BytesIO, DEFAULT_BUFFER_SIZE from itertools import chain, repeat, izip_longest @@ -395,7 +395,9 @@ class HTTPConnection(HTTPRequest): buf = [(x + '\r\n').encode('ascii') for x in buf] if self.method != 'HEAD': buf.append(msg) - self.response_ready(ReadOnlyFileBuffer(b''.join(buf))) + response_data = b''.join(buf) + self.log_access(status_code=status_code, response_size=len(response_data)) + self.response_ready(ReadOnlyFileBuffer(response_data)) def prepare_response(self, inheaders, request_body_file): if self.method == 'TRACE': @@ -420,7 +422,9 @@ class HTTPConnection(HTTPRequest): "Date: " + http_date(), "Content-Range: bytes */%d" % content_length, ] - self.response_ready(header_list_to_file(buf)) + response_data = header_list_to_file(buf) + self.log_access(status_code=httplib.REQUESTED_RANGE_NOT_SATISFIABLE, response_size=response_data.sz) + self.response_ready(response_data) def send_not_modified(self, etag=None): buf = [ @@ -430,7 +434,9 @@ class HTTPConnection(HTTPRequest): ] if etag is not None: buf.append('ETag: ' + etag) - self.response_ready(header_list_to_file(buf)) + response_data = header_list_to_file(buf) + self.log_access(status_code=httplib.NOT_MODIFIED, response_size=response_data.sz) + self.response_ready(response_data) def report_busy(self): self.simple_response(httplib.SERVICE_UNAVAILABLE) @@ -482,7 +488,25 @@ class HTTPConnection(HTTPRequest): x = x.decode('ascii') buf.append(x) buf.append('') - self.response_ready(ReadOnlyFileBuffer(b''.join((x + '\r\n').encode('ascii') for x in buf)), output=output) + response_data = ReadOnlyFileBuffer(b''.join((x + '\r\n').encode('ascii') for x in buf)) + if self.access_log is not None: + sz = outheaders.get('Content-Length') + if sz is not None: + sz = int(sz) + response_data.sz + self.log_access(status_code=data.status_code, response_size=sz, username=data.username) + self.response_ready(response_data, output=output) + + def log_access(self, status_code, response_size=None, username=None): + if self.access_log is None: + return + if not self.opts.log_not_found and status_code == httplib.NOT_FOUND: + return + line = '%s port-%s %s %s "%s" %s %s' % ( + self.remote_addr, self.remote_port, username or '-', + time.strftime('%d/%b/%Y:%H:%M:%S %z'), + force_unicode(self.request_line or '', 'utf-8'), + status_code, ('-' if response_size is None else response_size)) + self.access_log(line) def response_ready(self, header_file, output=None): self.response_started = True @@ -612,7 +636,7 @@ class HTTPConnection(HTTPRequest): return self.send_range_not_satisfiable(output.content_length) for header in ('Accept-Ranges', 'Content-Encoding', 'Transfer-Encoding', 'ETag', 'Content-Length'): - outheaders.pop('header', all=True) + outheaders.pop(header, all=True) matched = '*' in none_match or (output.etag and output.etag in none_match) if matched: diff --git a/src/calibre/srv/loop.py b/src/calibre/srv/loop.py index 662a049497..536ed1b1c8 100644 --- a/src/calibre/srv/loop.py +++ b/src/calibre/srv/loop.py @@ -117,8 +117,8 @@ class ReadBuffer(object): # {{{ class Connection(object): # {{{ - def __init__(self, socket, opts, ssl_context, tdir, addr, pool, log, wakeup): - self.opts, self.pool, self.log, self.wakeup = opts, pool, log, wakeup + def __init__(self, socket, opts, ssl_context, tdir, addr, pool, log, access_log, wakeup): + self.opts, self.pool, self.log, self.wakeup, self.access_log = opts, pool, log, wakeup, access_log try: self.remote_addr = addr[0] self.remote_port = addr[1] @@ -304,12 +304,16 @@ class ServerLoop(object): plugins=(), # A calibre logging object. If None, a default log that logs to # stdout is used - log=None + log=None, + # A calibre logging object for access logging, by default no access + # logging is performed + access_log=None ): self.ready = False self.handler = handler self.opts = opts or Options() self.log = log or ThreadSafeLog(level=ThreadSafeLog.DEBUG) + self.access_log = access_log ba = (self.opts.listen_on, int(self.opts.port)) if not ba[0]: @@ -558,7 +562,8 @@ class ServerLoop(object): if sock is not None: s = sock.fileno() if s > -1: - self.connection_map[s] = conn = self.handler(sock, self.opts, self.ssl_context, self.tdir, addr, self.pool, self.log, self.wakeup) + self.connection_map[s] = conn = self.handler( + sock, self.opts, self.ssl_context, self.tdir, addr, self.pool, self.log, self.access_log, self.wakeup) if self.ssl_context is not None: yield s, conn, RDWR elif s == control: diff --git a/src/calibre/srv/opts.py b/src/calibre/srv/opts.py index cc84ff1cb6..a881897d69 100644 --- a/src/calibre/srv/opts.py +++ b/src/calibre/srv/opts.py @@ -103,6 +103,12 @@ raw_options = ( 'The maximum size of log files, generated by the server. When the log becomes larger' ' than this size, it is automatically rotated.', + 'Enable/disable logging of not found http requests', + 'log_not_found', True, + 'By default, the server logs all HTTP requests for resources that are not found.' + ' Tis can generate a lot of log spam, if your server is targeted by bots.' + ' Use this option to turn it off.', + 'Enable/disable password based authentication to access the server', 'auth', False, 'By default, the server is unrestricted, allowing anyone to access it. You can' diff --git a/src/calibre/srv/standalone.py b/src/calibre/srv/standalone.py index 57cb491d7d..554dd9ac4c 100644 --- a/src/calibre/srv/standalone.py +++ b/src/calibre/srv/standalone.py @@ -57,14 +57,16 @@ def daemonize(): # {{{ class Server(object): def __init__(self, libraries, opts): - log = None + log = access_log = None if opts.log: log = RotatingLog(opts.log, max_size=opts.max_log_size) + if opts.access_log: + access_log = RotatingLog(opts.access_log, max_size=opts.max_log_size) self.handler = Handler(libraries, opts) plugins = [] if opts.use_bonjour: plugins.append(BonJour()) - self.loop = ServerLoop(create_http_handler(self.handler.dispatch), opts=opts, log=log, plugins=plugins) + self.loop = ServerLoop(create_http_handler(self.handler.dispatch), opts=opts, log=log, access_log=access_log, plugins=plugins) self.handler.set_log(self.loop.log) self.serve_forever = self.loop.serve_forever self.stop = self.loop.stop @@ -181,7 +183,12 @@ program will be used. )) parser.add_option( '--log', default=None, - help=_('Path to log file for server log')) + help=_('Path to log file for server log. This log contains server information and errors, not access logs. By default it is written to stdout.')) + parser.add_option( + '--access-log', default=None, + help=_('Path to the access log file. This log contains information' + ' about clients connecting to the server and making requests. By' + ' default no access logging is done.')) parser.add_option('--daemonize', default=False, action='store_true', help=_('Run process in background as a daemon. No effect on Windows.')) parser.add_option('--pidfile', default=None,