From 05b2bf22be90471745d6046dba396cc1e9523343 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 26 Dec 2011 12:01:34 +0530 Subject: [PATCH] Content server: Update version of CherryPy used to 3.2.2 --- src/calibre/library/server/base.py | 1 + src/cherrypy/LICENSE.txt | 2 +- src/cherrypy/__init__.py | 291 ++- src/cherrypy/_cpcgifs.py | 79 - src/cherrypy/_cpchecker.py | 114 +- src/cherrypy/_cpcompat.py | 318 +++ src/cherrypy/_cpconfig.py | 408 ++- src/cherrypy/_cpdispatch.py | 246 +- src/cherrypy/_cperror.py | 288 +- src/cherrypy/_cplogging.py | 281 +- src/cherrypy/_cpmodpy.py | 54 +- src/cherrypy/_cpnative_server.py | 149 ++ src/cherrypy/_cpreqbody.py | 965 +++++++ src/cherrypy/_cprequest.py | 501 ++-- src/cherrypy/_cpserver.py | 131 +- src/cherrypy/_cptools.py | 194 +- src/cherrypy/_cptree.py | 108 +- src/cherrypy/_cpwsgi.py | 440 ++-- src/cherrypy/_cpwsgi_server.py | 76 +- src/cherrypy/cherryd | 53 +- src/cherrypy/lib/__init__.py | 167 +- src/cherrypy/lib/auth.py | 58 +- src/cherrypy/lib/auth_basic.py | 87 + src/cherrypy/lib/auth_digest.py | 365 +++ src/cherrypy/lib/caching.py | 416 ++- src/cherrypy/lib/covercp.py | 102 +- src/cherrypy/lib/cpstats.py | 662 +++++ src/cherrypy/lib/cptools.py | 321 ++- src/cherrypy/lib/encoding.py | 499 ++-- src/cherrypy/lib/gctools.py | 214 ++ src/cherrypy/lib/http.py | 408 +-- src/cherrypy/lib/httpauth.py | 45 +- src/cherrypy/lib/httputil.py | 506 ++++ src/cherrypy/lib/jsontools.py | 87 + src/cherrypy/lib/profiler.py | 57 +- src/cherrypy/lib/reprconf.py | 485 ++++ src/cherrypy/lib/safemime.py | 128 - src/cherrypy/lib/sessions.py | 380 ++- src/cherrypy/lib/static.py | 263 +- src/cherrypy/lib/tidy.py | 184 -- src/cherrypy/lib/wsgiapp.py | 77 - src/cherrypy/lib/{xmlrpc.py => xmlrpcutil.py} | 16 +- src/cherrypy/process/plugins.py | 262 +- src/cherrypy/process/servers.py | 197 +- src/cherrypy/process/win32.py | 7 +- src/cherrypy/process/wspbus.py | 157 +- src/cherrypy/scaffold/__init__.py | 2 +- src/cherrypy/scaffold/apache-fcgi.conf | 22 + src/cherrypy/scaffold/site.conf | 8 +- src/cherrypy/wsgiserver/__init__.py | 1802 +------------ src/cherrypy/wsgiserver/ssl_builtin.py | 91 + src/cherrypy/wsgiserver/ssl_pyopenssl.py | 256 ++ src/cherrypy/wsgiserver/wsgiserver2.py | 2322 +++++++++++++++++ src/cherrypy/wsgiserver/wsgiserver3.py | 2040 +++++++++++++++ 54 files changed, 12652 insertions(+), 4740 deletions(-) delete mode 100644 src/cherrypy/_cpcgifs.py create mode 100644 src/cherrypy/_cpcompat.py create mode 100644 src/cherrypy/_cpnative_server.py create mode 100644 src/cherrypy/_cpreqbody.py mode change 100644 => 100755 src/cherrypy/cherryd create mode 100644 src/cherrypy/lib/auth_basic.py create mode 100644 src/cherrypy/lib/auth_digest.py create mode 100644 src/cherrypy/lib/cpstats.py create mode 100644 src/cherrypy/lib/gctools.py create mode 100644 src/cherrypy/lib/httputil.py create mode 100644 src/cherrypy/lib/jsontools.py create mode 100644 src/cherrypy/lib/reprconf.py delete mode 100644 src/cherrypy/lib/safemime.py delete mode 100644 src/cherrypy/lib/tidy.py delete mode 100644 src/cherrypy/lib/wsgiapp.py rename src/cherrypy/lib/{xmlrpc.py => xmlrpcutil.py} (78%) create mode 100644 src/cherrypy/scaffold/apache-fcgi.conf create mode 100644 src/cherrypy/wsgiserver/ssl_builtin.py create mode 100644 src/cherrypy/wsgiserver/ssl_pyopenssl.py create mode 100644 src/cherrypy/wsgiserver/wsgiserver2.py create mode 100644 src/cherrypy/wsgiserver/wsgiserver3.py diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index d065ddc926..c53c1d59f6 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -136,6 +136,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, 'engine.autoreload_on' : getattr(opts, 'auto_reload', False), 'tools.log_headers.on' : opts.develop, + 'tools.encode.encoding' : 'UTF-8', 'checker.on' : opts.develop, 'request.show_tracebacks': show_tracebacks, 'server.socket_host' : listen_on, diff --git a/src/cherrypy/LICENSE.txt b/src/cherrypy/LICENSE.txt index 2504532a18..8db13fb23b 100644 --- a/src/cherrypy/LICENSE.txt +++ b/src/cherrypy/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org) +Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org) All rights reserved. Redistribution and use in source and binary forms, with or without modification, diff --git a/src/cherrypy/__init__.py b/src/cherrypy/__init__.py index a9a8a93752..41e3898bbf 100644 --- a/src/cherrypy/__init__.py +++ b/src/cherrypy/__init__.py @@ -57,101 +57,10 @@ These API's are described in the CherryPy specification: http://www.cherrypy.org/wiki/CherryPySpec """ -__version__ = "3.1.2" - -from urlparse import urljoin as _urljoin - - -class _AttributeDocstrings(type): - """Metaclass for declaring docstrings for class attributes.""" - # The full docstring for this type is down in the __init__ method so - # that it doesn't show up in help() for every consumer class. - - def __init__(cls, name, bases, dct): - '''Metaclass for declaring docstrings for class attributes. - - Base Python doesn't provide any syntax for setting docstrings on - 'data attributes' (non-callables). This metaclass allows class - definitions to follow the declaration of a data attribute with - a docstring for that attribute; the attribute docstring will be - popped from the class dict and folded into the class docstring. - - The naming convention for attribute docstrings is: - + "__doc". - For example: - - class Thing(object): - """A thing and its properties.""" - - __metaclass__ = cherrypy._AttributeDocstrings - - height = 50 - height__doc = """The height of the Thing in inches.""" - - In which case, help(Thing) starts like this: - - >>> help(mod.Thing) - Help on class Thing in module pkg.mod: - - class Thing(__builtin__.object) - | A thing and its properties. - | - | height [= 50]: - | The height of the Thing in inches. - | - - The benefits of this approach over hand-edited class docstrings: - 1. Places the docstring nearer to the attribute declaration. - 2. Makes attribute docs more uniform ("name (default): doc"). - 3. Reduces mismatches of attribute _names_ between - the declaration and the documentation. - 4. Reduces mismatches of attribute default _values_ between - the declaration and the documentation. - - The benefits of a metaclass approach over other approaches: - 1. Simpler ("less magic") than interface-based solutions. - 2. __metaclass__ can be specified at the module global level - for classic classes. - - For various formatting reasons, you should write multiline docs - with a leading newline and not a trailing one: - - response__doc = """ - The response object for the current thread. In the main thread, - and any threads which are not HTTP requests, this is None.""" - - The type of the attribute is intentionally not included, because - that's not How Python Works. Quack. - ''' - - newdoc = [cls.__doc__ or ""] - - dctnames = dct.keys() - dctnames.sort() - - for name in dctnames: - if name.endswith("__doc"): - # Remove the magic doc attribute. - if hasattr(cls, name): - delattr(cls, name) - - # Make a uniformly-indented docstring from it. - val = '\n'.join([' ' + line.strip() - for line in dct[name].split('\n')]) - - # Get the default value. - attrname = name[:-5] - try: - attrval = getattr(cls, attrname) - except AttributeError: - attrval = "missing" - - # Add the complete attribute docstring to our list. - newdoc.append("%s [= %r]:\n%s" % (attrname, attrval, val)) - - # Add our list of new docstrings to the class docstring. - cls.__doc__ = "\n\n".join(newdoc) +__version__ = "3.2.2" +from cherrypy._cpcompat import urljoin as _urljoin, urlencode as _urlencode +from cherrypy._cpcompat import basestring, unicodestr, set from cherrypy._cperror import HTTPError, HTTPRedirect, InternalRedirect from cherrypy._cperror import NotFound, CherryPyException, TimeoutError @@ -163,7 +72,7 @@ tools = _cptools.default_toolbox Tool = _cptools.Tool from cherrypy import _cprequest -from cherrypy.lib import http as _http +from cherrypy.lib import httputil as _httputil from cherrypy import _cptree tree = _cptree.Tree() @@ -180,17 +89,21 @@ except ImportError: engine = process.bus -# Timeout monitor +# Timeout monitor. We add two channels to the engine +# to which cherrypy.Application will publish. +engine.listeners['before_request'] = set() +engine.listeners['after_request'] = set() + class _TimeoutMonitor(process.plugins.Monitor): def __init__(self, bus): self.servings = [] process.plugins.Monitor.__init__(self, bus, self.run) - def acquire(self): + def before_request(self): self.servings.append((serving.request, serving.response)) - def release(self): + def after_request(self): try: self.servings.remove((serving.request, serving.response)) except ValueError: @@ -237,8 +150,7 @@ def quickstart(root=None, script_name="", config=None): if config: _global_conf_alias.update(config) - if root is not None: - tree.mount(root, script_name, config) + tree.mount(root, script_name, config) if hasattr(engine, "signal_handler"): engine.signal_handler.subscribe() @@ -249,10 +161,7 @@ def quickstart(root=None, script_name="", config=None): engine.block() -try: - from threading import local as _local -except ImportError: - from cherrypy._cpthreadinglocal import local as _local +from cherrypy._cpcompat import threadlocal as _local class _Serving(_local): """An interface for registering request and response objects. @@ -265,16 +174,14 @@ class _Serving(_local): thread-safe way. """ - __metaclass__ = _AttributeDocstrings - - request = _cprequest.Request(_http.Host("127.0.0.1", 80), - _http.Host("127.0.0.1", 1111)) - request__doc = """ + request = _cprequest.Request(_httputil.Host("127.0.0.1", 80), + _httputil.Host("127.0.0.1", 1111)) + """ The request object for the current thread. In the main thread, and any threads which are not receiving HTTP requests, this is None.""" response = _cprequest.Response() - response__doc = """ + """ The response object for the current thread. In the main thread, and any threads which are not receiving HTTP requests, this is None.""" @@ -341,7 +248,8 @@ class _ThreadLocalProxy(object): def __nonzero__(self): child = getattr(serving, self.__attrname__) return bool(child) - + # Python 3 + __bool__ = __nonzero__ # Create request and response object (the same objects will be used # throughout the entire life of the webserver, but will redirect @@ -376,15 +284,26 @@ except ImportError: from cherrypy import _cplogging class _GlobalLogManager(_cplogging.LogManager): + """A site-wide LogManager; routes to app.log or global log as appropriate. + + This :class:`LogManager` implements + cherrypy.log() and cherrypy.log.access(). If either + function is called during a request, the message will be sent to the + logger for the current Application. If they are called outside of a + request, the message will be sent to the site-wide logger. + """ def __call__(self, *args, **kwargs): - try: + """Log the given message to the app.log or global log as appropriate.""" + # Do NOT use try/except here. See http://www.cherrypy.org/ticket/945 + if hasattr(request, 'app') and hasattr(request.app, 'log'): log = request.app.log - except AttributeError: + else: log = self return log.error(*args, **kwargs) def access(self): + """Log an access message to the app.log or global log as appropriate.""" try: return request.app.log.access() except AttributeError: @@ -444,6 +363,138 @@ def expose(func=None, alias=None): alias = func return expose_ +def popargs(*args, **kwargs): + """A decorator for _cp_dispatch + (cherrypy.dispatch.Dispatcher.dispatch_method_name). + + Optional keyword argument: handler=(Object or Function) + + Provides a _cp_dispatch function that pops off path segments into + cherrypy.request.params under the names specified. The dispatch + is then forwarded on to the next vpath element. + + Note that any existing (and exposed) member function of the class that + popargs is applied to will override that value of the argument. For + instance, if you have a method named "list" on the class decorated with + popargs, then accessing "/list" will call that function instead of popping + it off as the requested parameter. This restriction applies to all + _cp_dispatch functions. The only way around this restriction is to create + a "blank class" whose only function is to provide _cp_dispatch. + + If there are path elements after the arguments, or more arguments + are requested than are available in the vpath, then the 'handler' + keyword argument specifies the next object to handle the parameterized + request. If handler is not specified or is None, then self is used. + If handler is a function rather than an instance, then that function + will be called with the args specified and the return value from that + function used as the next object INSTEAD of adding the parameters to + cherrypy.request.args. + + This decorator may be used in one of two ways: + + As a class decorator: + @cherrypy.popargs('year', 'month', 'day') + class Blog: + def index(self, year=None, month=None, day=None): + #Process the parameters here; any url like + #/, /2009, /2009/12, or /2009/12/31 + #will fill in the appropriate parameters. + + def create(self): + #This link will still be available at /create. Defined functions + #take precedence over arguments. + + Or as a member of a class: + class Blog: + _cp_dispatch = cherrypy.popargs('year', 'month', 'day') + #... + + The handler argument may be used to mix arguments with built in functions. + For instance, the following setup allows different activities at the + day, month, and year level: + + class DayHandler: + def index(self, year, month, day): + #Do something with this day; probably list entries + + def delete(self, year, month, day): + #Delete all entries for this day + + @cherrypy.popargs('day', handler=DayHandler()) + class MonthHandler: + def index(self, year, month): + #Do something with this month; probably list entries + + def delete(self, year, month): + #Delete all entries for this month + + @cherrypy.popargs('month', handler=MonthHandler()) + class YearHandler: + def index(self, year): + #Do something with this year + + #... + + @cherrypy.popargs('year', handler=YearHandler()) + class Root: + def index(self): + #... + + """ + + #Since keyword arg comes after *args, we have to process it ourselves + #for lower versions of python. + + handler = None + handler_call = False + for k,v in kwargs.items(): + if k == 'handler': + handler = v + else: + raise TypeError( + "cherrypy.popargs() got an unexpected keyword argument '{0}'" \ + .format(k) + ) + + import inspect + + if handler is not None \ + and (hasattr(handler, '__call__') or inspect.isclass(handler)): + handler_call = True + + def decorated(cls_or_self=None, vpath=None): + if inspect.isclass(cls_or_self): + #cherrypy.popargs is a class decorator + cls = cls_or_self + setattr(cls, dispatch.Dispatcher.dispatch_method_name, decorated) + return cls + + #We're in the actual function + self = cls_or_self + parms = {} + for arg in args: + if not vpath: + break + parms[arg] = vpath.pop(0) + + if handler is not None: + if handler_call: + return handler(**parms) + else: + request.params.update(parms) + return handler + + request.params.update(parms) + + #If we are the ultimate handler, then to prevent our _cp_dispatch + #from being called again, we will resolve remaining elements through + #getattr() directly. + if vpath: + return getattr(self, vpath.pop(0), None) + else: + return self + + return decorated def url(path="", qs="", script_name=None, base=None, relative=None): """Create an absolute URL for the given path. @@ -472,6 +523,8 @@ def url(path="", qs="", script_name=None, base=None, relative=None): the string 'server', the output will instead be a URL that is relative to the server root; i.e., it will start with a slash. """ + if isinstance(qs, (tuple, list, dict)): + qs = _urlencode(qs) if qs: qs = '?' + qs @@ -536,7 +589,7 @@ def url(path="", qs="", script_name=None, base=None, relative=None): elif relative: # "A relative reference that does not begin with a scheme name # or a slash character is termed a relative-path reference." - old = url().split('/')[:-1] + old = url(relative=False).split('/')[:-1] new = newurl.split('/') while old and new: a, b = old[0], new[0] @@ -555,6 +608,16 @@ from cherrypy import _cpconfig # Use _global_conf_alias so quickstart can use 'config' as an arg # without shadowing cherrypy.config. config = _global_conf_alias = _cpconfig.Config() +config.defaults = { + 'tools.log_tracebacks.on': True, + 'tools.log_headers.on': True, + 'tools.trailing_slash.on': True, + 'tools.encode.on': True + } +config.namespaces["log"] = lambda k, v: setattr(log, k, v) +config.namespaces["checker"] = lambda k, v: setattr(checker, k, v) +# Must reset to get our defaults applied. +config.reset() from cherrypy import _cpchecker checker = _cpchecker.Checker() diff --git a/src/cherrypy/_cpcgifs.py b/src/cherrypy/_cpcgifs.py deleted file mode 100644 index 3eb5dd85df..0000000000 --- a/src/cherrypy/_cpcgifs.py +++ /dev/null @@ -1,79 +0,0 @@ -import cgi -import cherrypy - - -class FieldStorage(cgi.FieldStorage): - def __init__(self, *args, **kwds): - try: - cgi.FieldStorage.__init__(self, *args, **kwds) - except ValueError, ex: - if str(ex) == 'Maximum content length exceeded': - raise cherrypy.HTTPError(status=413) - else: - raise ex - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary.""" - next = "--" + self.outerboundary - last = next + "--" - delim = "" - last_line_lfend = True - while 1: - line = self.fp.readline(1<<16) - if not line: - self.done = -1 - break - if line[:2] == "--" and last_line_lfend: - strippedline = line.strip() - if strippedline == next: - break - if strippedline == last: - self.done = 1 - break - odelim = delim - if line[-2:] == "\r\n": - delim = "\r\n" - line = line[:-2] - last_line_lfend = True - elif line[-1] == "\n": - delim = "\n" - line = line[:-1] - last_line_lfend = True - else: - delim = "" - last_line_lfend = False - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next = "--" + self.outerboundary - last = next + "--" - last_line_lfend = True - while 1: - line = self.fp.readline(1<<16) - if not line: - self.done = -1 - break - if line[:2] == "--" and last_line_lfend: - strippedline = line.strip() - if strippedline == next: - break - if strippedline == last: - self.done = 1 - break - if line.endswith('\n'): - last_line_lfend = True - else: - last_line_lfend = False - diff --git a/src/cherrypy/_cpchecker.py b/src/cherrypy/_cpchecker.py index ddf3438ce3..7ccfd89dc2 100644 --- a/src/cherrypy/_cpchecker.py +++ b/src/cherrypy/_cpchecker.py @@ -2,25 +2,26 @@ import os import warnings import cherrypy +from cherrypy._cpcompat import iteritems, copykeys, builtins class Checker(object): """A checker for CherryPy sites and their mounted applications. - on: set this to False to turn off the checker completely. - When this object is called at engine startup, it executes each - of its own methods whose names start with "check_". If you wish + of its own methods whose names start with ``check_``. If you wish to disable selected checks, simply add a line in your global - config which sets the appropriate method to False: + config which sets the appropriate method to False:: - [global] - checker.check_skipped_app_config = False + [global] + checker.check_skipped_app_config = False - You may also dynamically add or replace check_* methods in this way. + You may also dynamically add or replace ``check_*`` methods in this way. """ on = True + """If True (the default), run all checks; if False, turn off all checks.""" + def __init__(self): self._populate_known_types() @@ -34,7 +35,7 @@ class Checker(object): for name in dir(self): if name.startswith("check_"): method = getattr(self, name) - if method and callable(method): + if method and hasattr(method, '__call__'): method() finally: warnings.formatwarning = oldformatwarning @@ -46,8 +47,47 @@ class Checker(object): # This value should be set inside _cpconfig. global_config_contained_paths = False + def check_app_config_entries_dont_start_with_script_name(self): + """Check for Application config with sections that repeat script_name.""" + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + if not app.config: + continue + if sn == '': + continue + sn_atoms = sn.strip("/").split("/") + for key in app.config.keys(): + key_atoms = key.strip("/").split("/") + if key_atoms[:len(sn_atoms)] == sn_atoms: + warnings.warn( + "The application mounted at %r has config " \ + "entries that start with its script name: %r" % (sn, key)) + + def check_site_config_entries_in_app_config(self): + """Check for mounted Applications that have site-scoped config.""" + for sn, app in iteritems(cherrypy.tree.apps): + if not isinstance(app, cherrypy.Application): + continue + + msg = [] + for section, entries in iteritems(app.config): + if section.startswith('/'): + for key, value in iteritems(entries): + for n in ("engine.", "server.", "tree.", "checker."): + if key.startswith(n): + msg.append("[%s] %s = %s" % (section, key, value)) + if msg: + msg.insert(0, + "The application mounted at %r contains the following " + "config entries, which are only allowed in site-wide " + "config. Move them to a [global] section and pass them " + "to cherrypy.config.update() instead of tree.mount()." % sn) + warnings.warn(os.linesep.join(msg)) + def check_skipped_app_config(self): - for sn, app in cherrypy.tree.apps.iteritems(): + """Check for mounted Applications that have no config.""" + for sn, app in cherrypy.tree.apps.items(): if not isinstance(app, cherrypy.Application): continue if not app.config: @@ -61,10 +101,26 @@ class Checker(object): warnings.warn(msg) return + def check_app_config_brackets(self): + """Check for Application config with extraneous brackets in section names.""" + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + if not app.config: + continue + for key in app.config.keys(): + if key.startswith("[") or key.endswith("]"): + warnings.warn( + "The application mounted at %r has config " \ + "section names with extraneous brackets: %r. " + "Config *files* need brackets; config *dicts* " + "(e.g. passed to tree.mount) do not." % (sn, key)) + def check_static_paths(self): + """Check Application config for incorrect static paths.""" # Use the dummy Request object in the main thread. request = cherrypy.request - for sn, app in cherrypy.tree.apps.iteritems(): + for sn, app in cherrypy.tree.apps.items(): if not isinstance(app, cherrypy.Application): continue request.app = app @@ -130,9 +186,9 @@ class Checker(object): def _compat(self, config): """Process config and warn on each obsolete or deprecated entry.""" - for section, conf in config.iteritems(): + for section, conf in config.items(): if isinstance(conf, dict): - for k, v in conf.iteritems(): + for k, v in conf.items(): if k in self.obsolete: warnings.warn("%r is obsolete. Use %r instead.\n" "section: [%s]" % @@ -152,7 +208,7 @@ class Checker(object): def check_compatibility(self): """Process config and warn on each obsolete or deprecated entry.""" self._compat(cherrypy.config) - for sn, app in cherrypy.tree.apps.iteritems(): + for sn, app in cherrypy.tree.apps.items(): if not isinstance(app, cherrypy.Application): continue self._compat(app.config) @@ -164,16 +220,16 @@ class Checker(object): def _known_ns(self, app): ns = ["wsgi"] - ns.extend(app.toolboxes.keys()) - ns.extend(app.namespaces.keys()) - ns.extend(app.request_class.namespaces.keys()) - ns.extend(cherrypy.config.namespaces.keys()) + ns.extend(copykeys(app.toolboxes)) + ns.extend(copykeys(app.namespaces)) + ns.extend(copykeys(app.request_class.namespaces)) + ns.extend(copykeys(cherrypy.config.namespaces)) ns += self.extra_config_namespaces - for section, conf in app.config.iteritems(): + for section, conf in app.config.items(): is_path_section = section.startswith("/") if is_path_section and isinstance(conf, dict): - for k, v in conf.iteritems(): + for k, v in conf.items(): atoms = k.split(".") if len(atoms) > 1: if atoms[0] not in ns: @@ -197,7 +253,7 @@ class Checker(object): def check_config_namespaces(self): """Process config and warn on each unknown config namespace.""" - for sn, app in cherrypy.tree.apps.iteritems(): + for sn, app in cherrypy.tree.apps.items(): if not isinstance(app, cherrypy.Application): continue self._known_ns(app) @@ -210,14 +266,16 @@ class Checker(object): known_config_types = {} def _populate_known_types(self): - import __builtin__ - builtins = [x for x in vars(__builtin__).values() - if type(x) is type(str)] + b = [x for x in vars(builtins).values() + if type(x) is type(str)] def traverse(obj, namespace): for name in dir(obj): + # Hack for 3.2's warning about body_params + if name == 'body_params': + continue vtype = type(getattr(obj, name, None)) - if vtype in builtins: + if vtype in b: self.known_config_types[namespace + "." + name] = vtype traverse(cherrypy.request, "request") @@ -230,9 +288,9 @@ class Checker(object): msg = ("The config entry %r in section %r is of type %r, " "which does not match the expected type %r.") - for section, conf in config.iteritems(): + for section, conf in config.items(): if isinstance(conf, dict): - for k, v in conf.iteritems(): + for k, v in conf.items(): if v is not None: expected_type = self.known_config_types.get(k, None) vtype = type(v) @@ -251,7 +309,7 @@ class Checker(object): def check_config_types(self): """Assert that config values are of the same type as default values.""" self._known_types(cherrypy.config) - for sn, app in cherrypy.tree.apps.iteritems(): + for sn, app in cherrypy.tree.apps.items(): if not isinstance(app, cherrypy.Application): continue self._known_types(app.config) @@ -261,7 +319,7 @@ class Checker(object): def check_localhost(self): """Warn if any socket_host is 'localhost'. See #711.""" - for k, v in cherrypy.config.iteritems(): + for k, v in cherrypy.config.items(): if k == 'server.socket_host' and v == 'localhost': warnings.warn("The use of 'localhost' as a socket host can " "cause problems on newer systems, since 'localhost' can " diff --git a/src/cherrypy/_cpcompat.py b/src/cherrypy/_cpcompat.py new file mode 100644 index 0000000000..ed24c1ab31 --- /dev/null +++ b/src/cherrypy/_cpcompat.py @@ -0,0 +1,318 @@ +"""Compatibility code for using CherryPy with various versions of Python. + +CherryPy 3.2 is compatible with Python versions 2.3+. This module provides a +useful abstraction over the differences between Python versions, sometimes by +preferring a newer idiom, sometimes an older one, and sometimes a custom one. + +In particular, Python 2 uses str and '' for byte strings, while Python 3 +uses str and '' for unicode strings. We will call each of these the 'native +string' type for each version. Because of this major difference, this module +provides new 'bytestr', 'unicodestr', and 'nativestr' attributes, as well as +two functions: 'ntob', which translates native strings (of type 'str') into +byte strings regardless of Python version, and 'ntou', which translates native +strings to unicode strings. This also provides a 'BytesIO' name for dealing +specifically with bytes, and a 'StringIO' name for dealing with native strings. +It also provides a 'base64_decode' function with native strings as input and +output. +""" +import os +import re +import sys + +if sys.version_info >= (3, 0): + py3k = True + bytestr = bytes + unicodestr = str + nativestr = unicodestr + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) + def ntou(n, encoding='ISO-8859-1'): + """Return the given native string as a unicode string with the given encoding.""" + # In Python 3, the native string type is unicode + return n + def tonative(n, encoding='ISO-8859-1'): + """Return the given string as a native string in the given encoding.""" + # In Python 3, the native string type is unicode + if isinstance(n, bytes): + return n.decode(encoding) + return n + # type("") + from io import StringIO + # bytes: + from io import BytesIO as BytesIO +else: + # Python 2 + py3k = False + bytestr = str + unicodestr = unicode + nativestr = bytestr + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + def ntou(n, encoding='ISO-8859-1'): + """Return the given native string as a unicode string with the given encoding.""" + # In Python 2, the native string type is bytes. + # First, check for the special encoding 'escape'. The test suite uses this + # to signal that it wants to pass a string with embedded \uXXXX escapes, + # but without having to prefix it with u'' for Python 2, but no prefix + # for Python 3. + if encoding == 'escape': + return unicode( + re.sub(r'\\u([0-9a-zA-Z]{4})', + lambda m: unichr(int(m.group(1), 16)), + n.decode('ISO-8859-1'))) + # Assume it's already in the given encoding, which for ISO-8859-1 is almost + # always what was intended. + return n.decode(encoding) + def tonative(n, encoding='ISO-8859-1'): + """Return the given string as a native string in the given encoding.""" + # In Python 2, the native string type is bytes. + if isinstance(n, unicode): + return n.encode(encoding) + return n + try: + # type("") + from cStringIO import StringIO + except ImportError: + # type("") + from StringIO import StringIO + # bytes: + BytesIO = StringIO + +try: + set = set +except NameError: + from sets import Set as set + +try: + # Python 3.1+ + from base64 import decodebytes as _base64_decodebytes +except ImportError: + # Python 3.0- + # since CherryPy claims compability with Python 2.3, we must use + # the legacy API of base64 + from base64 import decodestring as _base64_decodebytes + +def base64_decode(n, encoding='ISO-8859-1'): + """Return the native string base64-decoded (as a native string).""" + if isinstance(n, unicodestr): + b = n.encode(encoding) + else: + b = n + b = _base64_decodebytes(b) + if nativestr is unicodestr: + return b.decode(encoding) + else: + return b + +try: + # Python 2.5+ + from hashlib import md5 +except ImportError: + from md5 import new as md5 + +try: + # Python 2.5+ + from hashlib import sha1 as sha +except ImportError: + from sha import new as sha + +try: + sorted = sorted +except NameError: + def sorted(i): + i = i[:] + i.sort() + return i + +try: + reversed = reversed +except NameError: + def reversed(x): + i = len(x) + while i > 0: + i -= 1 + yield x[i] + +try: + # Python 3 + from urllib.parse import urljoin, urlencode + from urllib.parse import quote, quote_plus + from urllib.request import unquote, urlopen + from urllib.request import parse_http_list, parse_keqv_list +except ImportError: + # Python 2 + from urlparse import urljoin + from urllib import urlencode, urlopen + from urllib import quote, quote_plus + from urllib import unquote + from urllib2 import parse_http_list, parse_keqv_list + +try: + from threading import local as threadlocal +except ImportError: + from cherrypy._cpthreadinglocal import local as threadlocal + +try: + dict.iteritems + # Python 2 + iteritems = lambda d: d.iteritems() + copyitems = lambda d: d.items() +except AttributeError: + # Python 3 + iteritems = lambda d: d.items() + copyitems = lambda d: list(d.items()) + +try: + dict.iterkeys + # Python 2 + iterkeys = lambda d: d.iterkeys() + copykeys = lambda d: d.keys() +except AttributeError: + # Python 3 + iterkeys = lambda d: d.keys() + copykeys = lambda d: list(d.keys()) + +try: + dict.itervalues + # Python 2 + itervalues = lambda d: d.itervalues() + copyvalues = lambda d: d.values() +except AttributeError: + # Python 3 + itervalues = lambda d: d.values() + copyvalues = lambda d: list(d.values()) + +try: + # Python 3 + import builtins +except ImportError: + # Python 2 + import __builtin__ as builtins + +try: + # Python 2. We have to do it in this order so Python 2 builds + # don't try to import the 'http' module from cherrypy.lib + from Cookie import SimpleCookie, CookieError + from httplib import BadStatusLine, HTTPConnection, HTTPSConnection, IncompleteRead, NotConnected + from BaseHTTPServer import BaseHTTPRequestHandler +except ImportError: + # Python 3 + from http.cookies import SimpleCookie, CookieError + from http.client import BadStatusLine, HTTPConnection, HTTPSConnection, IncompleteRead, NotConnected + from http.server import BaseHTTPRequestHandler + +try: + # Python 2. We have to do it in this order so Python 2 builds + # don't try to import the 'http' module from cherrypy.lib + from httplib import HTTPSConnection +except ImportError: + try: + # Python 3 + from http.client import HTTPSConnection + except ImportError: + # Some platforms which don't have SSL don't expose HTTPSConnection + HTTPSConnection = None + +try: + # Python 2 + xrange = xrange +except NameError: + # Python 3 + xrange = range + +import threading +if hasattr(threading.Thread, "daemon"): + # Python 2.6+ + def get_daemon(t): + return t.daemon + def set_daemon(t, val): + t.daemon = val +else: + def get_daemon(t): + return t.isDaemon() + def set_daemon(t, val): + t.setDaemon(val) + +try: + from email.utils import formatdate + def HTTPDate(timeval=None): + return formatdate(timeval, usegmt=True) +except ImportError: + from rfc822 import formatdate as HTTPDate + +try: + # Python 3 + from urllib.parse import unquote as parse_unquote + def unquote_qs(atom, encoding, errors='strict'): + return parse_unquote(atom.replace('+', ' '), encoding=encoding, errors=errors) +except ImportError: + # Python 2 + from urllib import unquote as parse_unquote + def unquote_qs(atom, encoding, errors='strict'): + return parse_unquote(atom.replace('+', ' ')).decode(encoding, errors) + +try: + # Prefer simplejson, which is usually more advanced than the builtin module. + import simplejson as json + json_decode = json.JSONDecoder().decode + json_encode = json.JSONEncoder().iterencode +except ImportError: + if py3k: + # Python 3.0: json is part of the standard library, + # but outputs unicode. We need bytes. + import json + json_decode = json.JSONDecoder().decode + _json_encode = json.JSONEncoder().iterencode + def json_encode(value): + for chunk in _json_encode(value): + yield chunk.encode('utf8') + elif sys.version_info >= (2, 6): + # Python 2.6: json is part of the standard library + import json + json_decode = json.JSONDecoder().decode + json_encode = json.JSONEncoder().iterencode + else: + json = None + def json_decode(s): + raise ValueError('No JSON library is available') + def json_encode(s): + raise ValueError('No JSON library is available') + +try: + import cPickle as pickle +except ImportError: + # In Python 2, pickle is a Python version. + # In Python 3, pickle is the sped-up C version. + import pickle + +try: + os.urandom(20) + import binascii + def random20(): + return binascii.hexlify(os.urandom(20)).decode('ascii') +except (AttributeError, NotImplementedError): + import random + # os.urandom not available until Python 2.4. Fall back to random.random. + def random20(): + return sha('%s' % random.random()).hexdigest() + +try: + from _thread import get_ident as get_thread_ident +except ImportError: + from thread import get_ident as get_thread_ident + +try: + # Python 3 + next = next +except NameError: + # Python 2 + def next(i): + return i.next() diff --git a/src/cherrypy/_cpconfig.py b/src/cherrypy/_cpconfig.py index adc9911b17..7b4c6a46d9 100644 --- a/src/cherrypy/_cpconfig.py +++ b/src/cherrypy/_cpconfig.py @@ -1,4 +1,5 @@ -"""Configuration system for CherryPy. +""" +Configuration system for CherryPy. Configuration in CherryPy is implemented via dictionaries. Keys are strings which name the mapped value, which may be of any type. @@ -10,17 +11,20 @@ Architecture CherryPy Requests are part of an Application, which runs in a global context, and configuration data may apply to any of those three scopes: - Global: configuration entries which apply everywhere are stored in +Global + Configuration entries which apply everywhere are stored in cherrypy.config. - - Application: entries which apply to each mounted application are stored + +Application + Entries which apply to each mounted application are stored on the Application object itself, as 'app.config'. This is a two-level dict where each key is a path, or "relative URL" (for example, "/" or "/path/to/my/page"), and each value is a config dict. Usually, this data is provided in the call to tree.mount(root(), config=conf), although you may also use app.merge(conf). - - Request: each Request object possesses a single 'Request.config' dict. + +Request + Each Request object possesses a single 'Request.config' dict. Early in the request process, this dict is populated by merging global config entries, Application entries (whose path equals or is a parent of Request.path_info), and any config acquired while looking up the @@ -33,7 +37,7 @@ Declaration Configuration data may be supplied as a Python dictionary, as a filename, or as an open file object. When you supply a filename or file, CherryPy uses Python's builtin ConfigParser; you declare Application config by -writing each path as a section header: +writing each path as a section header:: [/path/to/my/page] request.stream = True @@ -41,8 +45,8 @@ writing each path as a section header: To declare global configuration entries, place them in a [global] section. You may also declare config entries directly on the classes and methods -(page handlers) that make up your CherryPy application via the '_cp_config' -attribute. For example: +(page handlers) that make up your CherryPy application via the ``_cp_config`` +attribute. For example:: class Demo: _cp_config = {'tools.gzip.on': True} @@ -52,9 +56,11 @@ attribute. For example: index.exposed = True index._cp_config = {'request.show_tracebacks': False} -Note, however, that this behavior is only guaranteed for the default -dispatcher. Other dispatchers may have different restrictions on where -you can attach _cp_config attributes. +.. note:: + + This behavior is only guaranteed for the default dispatcher. + Other dispatchers may have different restrictions on where + you can attach _cp_config attributes. Namespaces @@ -63,23 +69,42 @@ Namespaces Configuration keys are separated into namespaces by the first "." in the key. Current namespaces: - engine: Controls the 'application engine', including autoreload. - These can only be declared in the global config. - tree: Grafts cherrypy.Application objects onto cherrypy.tree. - These can only be declared in the global config. - hooks: Declares additional request-processing functions. - log: Configures the logging for each application. - These can only be declared in the global or / config. - request: Adds attributes to each Request. - response: Adds attributes to each Response. - server: Controls the default HTTP server via cherrypy.server. - These can only be declared in the global config. - tools: Runs and configures additional request-processing packages. - wsgi: Adds WSGI middleware to an Application's "pipeline". - These can only be declared in the app's root config ("/"). - checker: Controls the 'checker', which looks for common errors in - app state (including config) when the engine starts. - Global config only. +engine + Controls the 'application engine', including autoreload. + These can only be declared in the global config. + +tree + Grafts cherrypy.Application objects onto cherrypy.tree. + These can only be declared in the global config. + +hooks + Declares additional request-processing functions. + +log + Configures the logging for each application. + These can only be declared in the global or / config. + +request + Adds attributes to each Request. + +response + Adds attributes to each Response. + +server + Controls the default HTTP server via cherrypy.server. + These can only be declared in the global config. + +tools + Runs and configures additional request-processing packages. + +wsgi + Adds WSGI middleware to an Application's "pipeline". + These can only be declared in the app's root config ("/"). + +checker + Controls the 'checker', which looks for common errors in + app state (including config) when the engine starts. + Global config only. The only key that does not exist in a namespace is the "environment" entry. This special entry 'imports' other config entries from a template stored in @@ -93,56 +118,12 @@ be any string, and the handler must be either a callable or a (Python 2.5 style) context manager. """ -import ConfigParser -try: - set -except NameError: - from sets import Set as set -import sys - import cherrypy +from cherrypy._cpcompat import set, basestring +from cherrypy.lib import reprconf - -environments = { - "staging": { - 'engine.autoreload_on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': False, - }, - "production": { - 'engine.autoreload_on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': False, - 'log.screen': False, - }, - "embedded": { - # For use with CherryPy embedded in another deployment stack. - 'engine.autoreload_on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': False, - 'log.screen': False, - 'engine.SIGHUP': None, - 'engine.SIGTERM': None, - }, - "test_suite": { - 'engine.autoreload_on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': True, - 'log.screen': False, - }, - } - -def as_dict(config): - """Return a dict from 'config' whether it is a dict, file, or filename.""" - if isinstance(config, basestring): - config = _Parser().dict_from_file(config) - elif hasattr(config, 'read'): - config = _Parser().dict_from_file(config) - return config +# Deprecated in CherryPy 3.2--remove in 3.3 +NamespaceSet = reprconf.NamespaceSet def merge(base, other): """Merge one app config (from a dict, file, or filename) into another. @@ -154,142 +135,116 @@ def merge(base, other): cherrypy.engine.autoreload.files.add(other) # Load other into base - for section, value_map in as_dict(other).iteritems(): + for section, value_map in reprconf.as_dict(other).items(): + if not isinstance(value_map, dict): + raise ValueError( + "Application config must include section headers, but the " + "config you tried to merge doesn't have any sections. " + "Wrap your config in another dict with paths as section " + "headers, for example: {'/': config}.") base.setdefault(section, {}).update(value_map) -class NamespaceSet(dict): - """A dict of config namespace names and handlers. - - Each config entry should begin with a namespace name; the corresponding - namespace handler will be called once for each config entry in that - namespace, and will be passed two arguments: the config key (with the - namespace removed) and the config value. - - Namespace handlers may be any Python callable; they may also be - Python 2.5-style 'context managers', in which case their __enter__ - method should return a callable to be used as the handler. - See cherrypy.tools (the Toolbox class) for an example. - """ - - def __call__(self, config): - """Iterate through config and pass it to each namespace handler. - - 'config' should be a flat dict, where keys use dots to separate - namespaces, and values are arbitrary. - - The first name in each config key is used to look up the corresponding - namespace handler. For example, a config entry of {'tools.gzip.on': v} - will call the 'tools' namespace handler with the args: ('gzip.on', v) - """ - # Separate the given config into namespaces - ns_confs = {} - for k in config: - if "." in k: - ns, name = k.split(".", 1) - bucket = ns_confs.setdefault(ns, {}) - bucket[name] = config[k] - - # I chose __enter__ and __exit__ so someday this could be - # rewritten using Python 2.5's 'with' statement: - # for ns, handler in self.iteritems(): - # with handler as callable: - # for k, v in ns_confs.get(ns, {}).iteritems(): - # callable(k, v) - for ns, handler in self.iteritems(): - exit = getattr(handler, "__exit__", None) - if exit: - callable = handler.__enter__() - no_exc = True - try: - try: - for k, v in ns_confs.get(ns, {}).iteritems(): - callable(k, v) - except: - # The exceptional case is handled here - no_exc = False - if exit is None: - raise - if not exit(*sys.exc_info()): - raise - # The exception is swallowed if exit() returns true - finally: - # The normal and non-local-goto cases are handled here - if no_exc and exit: - exit(None, None, None) - else: - for k, v in ns_confs.get(ns, {}).iteritems(): - handler(k, v) - - def __repr__(self): - return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, - dict.__repr__(self)) - - def __copy__(self): - newobj = self.__class__() - newobj.update(self) - return newobj - copy = __copy__ - - -class Config(dict): +class Config(reprconf.Config): """The 'global' configuration data for the entire CherryPy process.""" - - defaults = { - 'tools.log_tracebacks.on': True, - 'tools.log_headers.on': True, - 'tools.trailing_slash.on': True, - } - - namespaces = NamespaceSet( - **{"server": lambda k, v: setattr(cherrypy.server, k, v), - "log": lambda k, v: setattr(cherrypy.log, k, v), - "checker": lambda k, v: setattr(cherrypy.checker, k, v), - }) - - def __init__(self): - self.reset() - - def reset(self): - """Reset self to default values.""" - self.clear() - dict.update(self, self.defaults) - + def update(self, config): """Update self from a dict, file or filename.""" if isinstance(config, basestring): # Filename cherrypy.engine.autoreload.files.add(config) - config = _Parser().dict_from_file(config) - elif hasattr(config, 'read'): - # Open file object - config = _Parser().dict_from_file(config) - else: - config = config.copy() - + reprconf.Config.update(self, config) + + def _apply(self, config): + """Update self from a dict.""" if isinstance(config.get("global", None), dict): if len(config) > 1: cherrypy.checker.global_config_contained_paths = True config = config["global"] - - which_env = config.get('environment') - if which_env: - env = environments[which_env] - for k in env: - if k not in config: - config[k] = env[k] - if 'tools.staticdir.dir' in config: config['tools.staticdir.section'] = "global" - - dict.update(self, config) - self.namespaces(config) + reprconf.Config._apply(self, config) - def __setitem__(self, k, v): - dict.__setitem__(self, k, v) - self.namespaces({k: v}) + def __call__(self, *args, **kwargs): + """Decorator for page handlers to set _cp_config.""" + if args: + raise TypeError( + "The cherrypy.config decorator does not accept positional " + "arguments; you must use keyword arguments.") + def tool_decorator(f): + if not hasattr(f, "_cp_config"): + f._cp_config = {} + for k, v in kwargs.items(): + f._cp_config[k] = v + return f + return tool_decorator +Config.environments = environments = { + "staging": { + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'request.show_mismatched_params': False, + }, + "production": { + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'request.show_mismatched_params': False, + 'log.screen': False, + }, + "embedded": { + # For use with CherryPy embedded in another deployment stack. + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'request.show_mismatched_params': False, + 'log.screen': False, + 'engine.SIGHUP': None, + 'engine.SIGTERM': None, + }, + "test_suite": { + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': True, + 'request.show_mismatched_params': True, + 'log.screen': False, + }, + } + + +def _server_namespace_handler(k, v): + """Config handler for the "server" namespace.""" + atoms = k.split(".", 1) + if len(atoms) > 1: + # Special-case config keys of the form 'server.servername.socket_port' + # to configure additional HTTP servers. + if not hasattr(cherrypy, "servers"): + cherrypy.servers = {} + + servername, k = atoms + if servername not in cherrypy.servers: + from cherrypy import _cpserver + cherrypy.servers[servername] = _cpserver.Server() + # On by default, but 'on = False' can unsubscribe it (see below). + cherrypy.servers[servername].subscribe() + + if k == 'on': + if v: + cherrypy.servers[servername].subscribe() + else: + cherrypy.servers[servername].unsubscribe() + else: + setattr(cherrypy.servers[servername], k, v) + else: + setattr(cherrypy.server, k, v) +Config.namespaces["server"] = _server_namespace_handler + def _engine_namespace_handler(k, v): """Backward compatibility handler for the "engine" namespace.""" engine = cherrypy.engine @@ -314,10 +269,10 @@ def _engine_namespace_handler(k, v): plugin, attrname = k.split(".", 1) plugin = getattr(engine, plugin) if attrname == 'on': - if v and callable(getattr(plugin, 'subscribe', None)): + if v and hasattr(getattr(plugin, 'subscribe', None), '__call__'): plugin.subscribe() return - elif (not v) and callable(getattr(plugin, 'unsubscribe', None)): + elif (not v) and hasattr(getattr(plugin, 'unsubscribe', None), '__call__'): plugin.unsubscribe() return setattr(plugin, attrname, v) @@ -328,58 +283,13 @@ Config.namespaces["engine"] = _engine_namespace_handler def _tree_namespace_handler(k, v): """Namespace handler for the 'tree' config namespace.""" - cherrypy.tree.graft(v, v.script_name) - cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/")) + if isinstance(v, dict): + for script_name, app in v.items(): + cherrypy.tree.graft(app, script_name) + cherrypy.engine.log("Mounted: %s on %s" % (app, script_name or "/")) + else: + cherrypy.tree.graft(v, v.script_name) + cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/")) Config.namespaces["tree"] = _tree_namespace_handler -class _Parser(ConfigParser.ConfigParser): - """Sub-class of ConfigParser that keeps the case of options and that raises - an exception if the file cannot be read. - """ - - def optionxform(self, optionstr): - return optionstr - - def read(self, filenames): - if isinstance(filenames, basestring): - filenames = [filenames] - for filename in filenames: - # try: - # fp = open(filename) - # except IOError: - # continue - fp = open(filename) - try: - self._read(fp, filename) - finally: - fp.close() - - def as_dict(self, raw=False, vars=None): - """Convert an INI file to a dictionary""" - # Load INI file into a dict - from cherrypy.lib import unrepr - result = {} - for section in self.sections(): - if section not in result: - result[section] = {} - for option in self.options(section): - value = self.get(section, option, raw, vars) - try: - value = unrepr(value) - except Exception, x: - msg = ("Config error in section: %r, option: %r, " - "value: %r. Config values must be valid Python." % - (section, option, value)) - raise ValueError(msg, x.__class__.__name__, x.args) - result[section][option] = value - return result - - def dict_from_file(self, file): - if hasattr(file, 'read'): - self.readfp(file) - else: - self.read(file) - return self.as_dict() - -del ConfigParser diff --git a/src/cherrypy/_cpdispatch.py b/src/cherrypy/_cpdispatch.py index c29fb05bbe..d614e08693 100644 --- a/src/cherrypy/_cpdispatch.py +++ b/src/cherrypy/_cpdispatch.py @@ -9,7 +9,16 @@ The default dispatcher discovers the page handler by matching path_info to a hierarchical arrangement of objects, starting at request.app.root. """ +import string +import sys +import types +try: + classtype = (type, types.ClassType) +except AttributeError: + classtype = type + import cherrypy +from cherrypy._cpcompat import set class PageHandler(object): @@ -23,10 +32,17 @@ class PageHandler(object): def __call__(self): try: return self.callable(*self.args, **self.kwargs) - except TypeError, x: - test_callable_spec(self.callable, self.args, self.kwargs) + except TypeError: + x = sys.exc_info()[1] + try: + test_callable_spec(self.callable, self.args, self.kwargs) + except cherrypy.HTTPError: + raise sys.exc_info()[1] + except: + raise x raise + def test_callable_spec(callable, callable_args, callable_kwargs): """ Inspect callable and test to see if the given args are suitable for it. @@ -46,7 +62,17 @@ def test_callable_spec(callable, callable_args, callable_kwargs): incorrect, then a 404 Not Found should be raised. Conversely the body parameters are part of the request; if they are invalid a 400 Bad Request. """ - (args, varargs, varkw, defaults) = inspect.getargspec(callable) + show_mismatched_params = getattr( + cherrypy.serving.request, 'show_mismatched_params', False) + try: + (args, varargs, varkw, defaults) = inspect.getargspec(callable) + except TypeError: + if isinstance(callable, object) and hasattr(callable, '__call__'): + (args, varargs, varkw, defaults) = inspect.getargspec(callable.__call__) + else: + # If it wasn't one of our own types, re-raise + # the original error + raise if args and args[0] == 'self': args = args[1:] @@ -69,14 +95,16 @@ def test_callable_spec(callable, callable_args, callable_kwargs): varkw_usage += 1 extra_kwargs.add(key) + # figure out which args have defaults. + args_with_defaults = args[-len(defaults or []):] for i, val in enumerate(defaults or []): # Defaults take effect only when the arg hasn't been used yet. - if arg_usage[args[i]] == 0: - arg_usage[args[i]] += 1 + if arg_usage[args_with_defaults[i]] == 0: + arg_usage[args_with_defaults[i]] += 1 missing_args = [] multiple_args = [] - for key, usage in arg_usage.iteritems(): + for key, usage in arg_usage.items(): if usage == 0: missing_args.append(key) elif usage > 1: @@ -95,19 +123,20 @@ def test_callable_spec(callable, callable_args, callable_kwargs): # # In the case where the method does not allow body # arguments it's definitely a 404. - raise cherrypy.HTTPError(404, - message="Missing parameters: %s" % ",".join(missing_args)) + message = None + if show_mismatched_params: + message="Missing parameters: %s" % ",".join(missing_args) + raise cherrypy.HTTPError(404, message=message) # the extra positional arguments come from the path - 404 Not Found if not varargs and vararg_usage > 0: raise cherrypy.HTTPError(404) - body_params = cherrypy.request.body_params or {} + body_params = cherrypy.serving.request.body.params or {} body_params = set(body_params.keys()) qs_params = set(callable_kwargs.keys()) - body_params if multiple_args: - if qs_params.intersection(set(multiple_args)): # If any of the multiple parameters came from the query string then # it's a 404 Not Found @@ -116,25 +145,31 @@ def test_callable_spec(callable, callable_args, callable_kwargs): # Otherwise it's a 400 Bad Request error = 400 - raise cherrypy.HTTPError(error, - message="Multiple values for parameters: "\ - "%s" % ",".join(multiple_args)) + message = None + if show_mismatched_params: + message="Multiple values for parameters: "\ + "%s" % ",".join(multiple_args) + raise cherrypy.HTTPError(error, message=message) if not varkw and varkw_usage > 0: # If there were extra query string parameters, it's a 404 Not Found extra_qs_params = set(qs_params).intersection(extra_kwargs) if extra_qs_params: - raise cherrypy.HTTPError(404, + message = None + if show_mismatched_params: message="Unexpected query string "\ - "parameters: %s" % ", ".join(extra_qs_params)) + "parameters: %s" % ", ".join(extra_qs_params) + raise cherrypy.HTTPError(404, message=message) # If there were any extra body parameters, it's a 400 Not Found extra_body_params = set(body_params).intersection(extra_kwargs) if extra_body_params: - raise cherrypy.HTTPError(400, + message = None + if show_mismatched_params: message="Unexpected body parameters: "\ - "%s" % ", ".join(extra_body_params)) + "%s" % ", ".join(extra_body_params) + raise cherrypy.HTTPError(400, message=message) try: @@ -154,7 +189,7 @@ class LateParamPageHandler(PageHandler): """ def _get_kwargs(self): - kwargs = cherrypy.request.params.copy() + kwargs = cherrypy.serving.request.params.copy() if self._kwargs: kwargs.update(self._kwargs) return kwargs @@ -167,6 +202,19 @@ class LateParamPageHandler(PageHandler): 'cherrypy.request.params copied in)') +if sys.version_info < (3, 0): + punctuation_to_underscores = string.maketrans( + string.punctuation, '_' * len(string.punctuation)) + def validate_translator(t): + if not isinstance(t, str) or len(t) != 256: + raise ValueError("The translate argument must be a str of len 256.") +else: + punctuation_to_underscores = str.maketrans( + string.punctuation, '_' * len(string.punctuation)) + def validate_translator(t): + if not isinstance(t, dict): + raise ValueError("The translate argument must be a dict.") + class Dispatcher(object): """CherryPy Dispatcher which walks a tree of objects to find a handler. @@ -181,9 +229,22 @@ class Dispatcher(object): This is the default, built-in dispatcher for CherryPy. """ + dispatch_method_name = '_cp_dispatch' + """ + The name of the dispatch method that nodes may optionally implement + to provide their own dynamic dispatch algorithm. + """ + + def __init__(self, dispatch_method_name=None, + translate=punctuation_to_underscores): + validate_translator(translate) + self.translate = translate + if dispatch_method_name: + self.dispatch_method_name = dispatch_method_name + def __call__(self, path_info): """Set handler and config for the current request.""" - request = cherrypy.request + request = cherrypy.serving.request func, vpath = self.find_handler(path_info) if func: @@ -212,55 +273,101 @@ class Dispatcher(object): These virtual path components are passed to the handler as positional arguments. """ - request = cherrypy.request + request = cherrypy.serving.request app = request.app root = app.root + dispatch_name = self.dispatch_method_name # Get config for the root object/path. - curpath = "" + fullpath = [x for x in path.strip('/').split('/') if x] + ['index'] + fullpath_len = len(fullpath) + segleft = fullpath_len nodeconf = {} if hasattr(root, "_cp_config"): nodeconf.update(root._cp_config) if "/" in app.config: nodeconf.update(app.config["/"]) - object_trail = [['root', root, nodeconf, curpath]] + object_trail = [['root', root, nodeconf, segleft]] node = root - names = [x for x in path.strip('/').split('/') if x] + ['index'] - for name in names: - # map to legal Python identifiers (replace '.' with '_') - objname = name.replace('.', '_') + iternames = fullpath[:] + while iternames: + name = iternames[0] + # map to legal Python identifiers (e.g. replace '.' with '_') + objname = name.translate(self.translate) nodeconf = {} - node = getattr(node, objname, None) + subnode = getattr(node, objname, None) + pre_len = len(iternames) + if subnode is None: + dispatch = getattr(node, dispatch_name, None) + if dispatch and hasattr(dispatch, '__call__') and not \ + getattr(dispatch, 'exposed', False) and \ + pre_len > 1: + #Don't expose the hidden 'index' token to _cp_dispatch + #We skip this if pre_len == 1 since it makes no sense + #to call a dispatcher when we have no tokens left. + index_name = iternames.pop() + subnode = dispatch(vpath=iternames) + iternames.append(index_name) + else: + #We didn't find a path, but keep processing in case there + #is a default() handler. + iternames.pop(0) + else: + #We found the path, remove the vpath entry + iternames.pop(0) + segleft = len(iternames) + if segleft > pre_len: + #No path segment was removed. Raise an error. + raise cherrypy.CherryPyException( + "A vpath segment was added. Custom dispatchers may only " + + "remove elements. While trying to process " + + "{0} in {1}".format(name, fullpath) + ) + elif segleft == pre_len: + #Assume that the handler used the current path segment, but + #did not pop it. This allows things like + #return getattr(self, vpath[0], None) + iternames.pop(0) + segleft -= 1 + node = subnode + if node is not None: # Get _cp_config attached to this node. if hasattr(node, "_cp_config"): nodeconf.update(node._cp_config) # Mix in values from app.config for this path. - curpath = "/".join((curpath, name)) - if curpath in app.config: - nodeconf.update(app.config[curpath]) + existing_len = fullpath_len - pre_len + if existing_len != 0: + curpath = '/' + '/'.join(fullpath[0:existing_len]) + else: + curpath = '' + new_segs = fullpath[fullpath_len - pre_len:fullpath_len - segleft] + for seg in new_segs: + curpath += '/' + seg + if curpath in app.config: + nodeconf.update(app.config[curpath]) + + object_trail.append([name, node, nodeconf, segleft]) - object_trail.append([name, node, nodeconf, curpath]) - def set_conf(): """Collapse all object_trail config into cherrypy.request.config.""" base = cherrypy.config.copy() # Note that we merge the config from each node # even if that node was None. - for name, obj, conf, curpath in object_trail: + for name, obj, conf, segleft in object_trail: base.update(conf) if 'tools.staticdir.dir' in conf: - base['tools.staticdir.section'] = curpath + base['tools.staticdir.section'] = '/' + '/'.join(fullpath[0:fullpath_len - segleft]) return base # Try successive objects (reverse order) num_candidates = len(object_trail) - 1 - for i in xrange(num_candidates, -1, -1): + for i in range(num_candidates, -1, -1): - name, candidate, nodeconf, curpath = object_trail[i] + name, candidate, nodeconf, segleft = object_trail[i] if candidate is None: continue @@ -270,11 +377,11 @@ class Dispatcher(object): if getattr(defhandler, 'exposed', False): # Insert any extra _cp_config from the default handler. conf = getattr(defhandler, "_cp_config", {}) - object_trail.insert(i+1, ["default", defhandler, conf, curpath]) + object_trail.insert(i+1, ["default", defhandler, conf, segleft]) request.config = set_conf() # See http://www.cherrypy.org/ticket/613 request.is_index = path.endswith("/") - return defhandler, names[i:-1] + return defhandler, fullpath[fullpath_len - segleft:-1] # Uncomment the next line to restrict positional params to "default". # if i < num_candidates - 2: continue @@ -292,7 +399,7 @@ class Dispatcher(object): # Note that this also includes handlers which take # positional parameters (virtual paths). request.is_index = False - return candidate, names[i:-1] + return candidate, fullpath[fullpath_len - segleft:-1] # We didn't find anything request.config = set_conf() @@ -312,7 +419,7 @@ class MethodDispatcher(Dispatcher): def __call__(self, path_info): """Set handler and config for the current request.""" - request = cherrypy.request + request = cherrypy.serving.request resource, vpath = self.find_handler(path_info) if resource: @@ -321,7 +428,7 @@ class MethodDispatcher(Dispatcher): if "GET" in avail and "HEAD" not in avail: avail.append("HEAD") avail.sort() - cherrypy.response.headers['Allow'] = ", ".join(avail) + cherrypy.serving.response.headers['Allow'] = ", ".join(avail) # Find the subhandler meth = request.method.upper() @@ -329,6 +436,10 @@ class MethodDispatcher(Dispatcher): if func is None and meth == "HEAD": func = getattr(resource, "GET", None) if func: + # Grab any _cp_config on the subhandler. + if hasattr(func, "_cp_config"): + request.config.update(func._cp_config) + # Decode any leftover %2F in the virtual_path atoms. vpath = [x.replace("%2F", "/") for x in vpath] request.handler = LateParamPageHandler(func, *vpath) @@ -366,20 +477,20 @@ class RoutesDispatcher(object): """Set handler and config for the current request.""" func = self.find_handler(path_info) if func: - cherrypy.request.handler = LateParamPageHandler(func) + cherrypy.serving.request.handler = LateParamPageHandler(func) else: - cherrypy.request.handler = cherrypy.NotFound() + cherrypy.serving.request.handler = cherrypy.NotFound() def find_handler(self, path_info): """Find the right page handler, and set request.config.""" import routes - request = cherrypy.request + request = cherrypy.serving.request config = routes.request_config() config.mapper = self.mapper - if hasattr(cherrypy.request, 'wsgi_environ'): - config.environ = cherrypy.request.wsgi_environ + if hasattr(request, 'wsgi_environ'): + config.environ = request.wsgi_environ config.host = request.headers.get('Host', None) config.protocol = request.scheme config.redirect = self.redirect @@ -424,20 +535,24 @@ class RoutesDispatcher(object): handler = None if result: - controller = result.get('controller', None) - controller = self.controllers.get(controller) + controller = result.get('controller') + controller = self.controllers.get(controller, controller) if controller: + if isinstance(controller, classtype): + controller = controller() # Get config from the controller. if hasattr(controller, "_cp_config"): merge(controller._cp_config) - action = result.get('action', None) + action = result.get('action') if action is not None: handler = getattr(controller, action, None) # Get config from the handler if hasattr(handler, "_cp_config"): merge(handler._cp_config) - + else: + handler = controller + # Do the last path atom here so it can # override the controller's _cp_config. if last: @@ -449,25 +564,26 @@ class RoutesDispatcher(object): def XMLRPCDispatcher(next_dispatcher=Dispatcher()): - from cherrypy.lib import xmlrpc + from cherrypy.lib import xmlrpcutil def xmlrpc_dispatch(path_info): - path_info = xmlrpc.patched_path(path_info) + path_info = xmlrpcutil.patched_path(path_info) return next_dispatcher(path_info) return xmlrpc_dispatch def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domains): - """Select a different handler based on the Host header. + """ + Select a different handler based on the Host header. This can be useful when running multiple sites within one CP server. It allows several domains to point to different parts of a single - website structure. For example: + website structure. For example:: http://www.domain.example -> root http://www.domain2.example -> root/domain2/ http://www.domain2.example:443 -> root/secure - can be accomplished via the following config: + can be accomplished via the following config:: [/] request.dispatch = cherrypy.dispatch.VirtualHost( @@ -475,15 +591,18 @@ def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domai 'www.domain2.example:443': '/secure', }) - next_dispatcher: the next dispatcher object in the dispatch chain. + next_dispatcher + The next dispatcher object in the dispatch chain. The VirtualHost dispatcher adds a prefix to the URL and calls another dispatcher. Defaults to cherrypy.dispatch.Dispatcher(). - use_x_forwarded_host: if True (the default), any "X-Forwarded-Host" + use_x_forwarded_host + If True (the default), any "X-Forwarded-Host" request header will be used instead of the "Host" header. This is commonly added by HTTP servers (such as Apache) when proxying. - **domains: a dict of {host header value: virtual prefix} pairs. + ``**domains`` + A dict of {host header value: virtual prefix} pairs. The incoming "Host" request header is looked up in this dict, and, if a match is found, the corresponding "virtual prefix" value will be prepended to the URL path before calling the @@ -491,9 +610,10 @@ def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domai for "example.com" and "www.example.com". In addition, "Host" headers may contain the port number. """ - from cherrypy.lib import http + from cherrypy.lib import httputil def vhost_dispatch(path_info): - header = cherrypy.request.headers.get + request = cherrypy.serving.request + header = request.headers.get domain = header('Host', '') if use_x_forwarded_host: @@ -501,15 +621,15 @@ def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domai prefix = domains.get(domain, "") if prefix: - path_info = http.urljoin(prefix, path_info) + path_info = httputil.urljoin(prefix, path_info) result = next_dispatcher(path_info) # Touch up staticdir config. See http://www.cherrypy.org/ticket/614. - section = cherrypy.request.config.get('tools.staticdir.section') + section = request.config.get('tools.staticdir.section') if section: section = section[len(prefix):] - cherrypy.request.config['tools.staticdir.section'] = section + request.config['tools.staticdir.section'] = section return result return vhost_dispatch diff --git a/src/cherrypy/_cperror.py b/src/cherrypy/_cperror.py index d3d0f229c3..76a409ff6b 100644 --- a/src/cherrypy/_cperror.py +++ b/src/cherrypy/_cperror.py @@ -1,13 +1,118 @@ -"""Error classes for CherryPy.""" +"""Exception classes for CherryPy. + +CherryPy provides (and uses) exceptions for declaring that the HTTP response +should be a status other than the default "200 OK". You can ``raise`` them like +normal Python exceptions. You can also call them and they will raise themselves; +this means you can set an :class:`HTTPError` +or :class:`HTTPRedirect` as the +:attr:`request.handler`. + +.. _redirectingpost: + +Redirecting POST +================ + +When you GET a resource and are redirected by the server to another Location, +there's generally no problem since GET is both a "safe method" (there should +be no side-effects) and an "idempotent method" (multiple calls are no different +than a single call). + +POST, however, is neither safe nor idempotent--if you +charge a credit card, you don't want to be charged twice by a redirect! + +For this reason, *none* of the 3xx responses permit a user-agent (browser) to +resubmit a POST on redirection without first confirming the action with the user: + +===== ================================= =========== +300 Multiple Choices Confirm with the user +301 Moved Permanently Confirm with the user +302 Found (Object moved temporarily) Confirm with the user +303 See Other GET the new URI--no confirmation +304 Not modified (for conditional GET only--POST should not raise this error) +305 Use Proxy Confirm with the user +307 Temporary Redirect Confirm with the user +===== ================================= =========== + +However, browsers have historically implemented these restrictions poorly; +in particular, many browsers do not force the user to confirm 301, 302 +or 307 when redirecting POST. For this reason, CherryPy defaults to 303, +which most user-agents appear to have implemented correctly. Therefore, if +you raise HTTPRedirect for a POST request, the user-agent will most likely +attempt to GET the new URI (without asking for confirmation from the user). +We realize this is confusing for developers, but it's the safest thing we +could do. You are of course free to raise ``HTTPRedirect(uri, status=302)`` +or any other 3xx status if you know what you're doing, but given the +environment, we couldn't let any of those be the default. + +Custom Error Handling +===================== + +.. image:: /refman/cperrors.gif + +Anticipated HTTP responses +-------------------------- + +The 'error_page' config namespace can be used to provide custom HTML output for +expected responses (like 404 Not Found). Supply a filename from which the output +will be read. The contents will be interpolated with the values %(status)s, +%(message)s, %(traceback)s, and %(version)s using plain old Python +`string formatting `_. + +:: + + _cp_config = {'error_page.404': os.path.join(localDir, "static/index.html")} + + +Beginning in version 3.1, you may also provide a function or other callable as +an error_page entry. It will be passed the same status, message, traceback and +version arguments that are interpolated into templates:: + + def error_page_402(status, message, traceback, version): + return "Error %s - Well, I'm very sorry but you haven't paid!" % status + cherrypy.config.update({'error_page.402': error_page_402}) + +Also in 3.1, in addition to the numbered error codes, you may also supply +"error_page.default" to handle all codes which do not have their own error_page entry. + + + +Unanticipated errors +-------------------- + +CherryPy also has a generic error handling mechanism: whenever an unanticipated +error occurs in your code, it will call +:func:`Request.error_response` to set +the response status, headers, and body. By default, this is the same output as +:class:`HTTPError(500) `. If you want to provide +some other behavior, you generally replace "request.error_response". + +Here is some sample code that shows how to display a custom error message and +send an e-mail containing the error:: + + from cherrypy import _cperror + + def handle_error(): + cherrypy.response.status = 500 + cherrypy.response.body = ["Sorry, an error occured"] + sendMail('error@domain.com', 'Error in your web app', _cperror.format_exc()) + + class Root: + _cp_config = {'request.error_response': handle_error} + + +Note that you have to explicitly set :attr:`response.body ` +and not simply return an error message as a result. +""" from cgi import escape as _escape from sys import exc_info as _exc_info from traceback import format_exception as _format_exception -from urlparse import urljoin as _urljoin -from cherrypy.lib import http as _http +from cherrypy._cpcompat import basestring, bytestr, iteritems, ntob, tonative, urljoin as _urljoin +from cherrypy.lib import httputil as _httputil class CherryPyException(Exception): + """A base class for CherryPy exceptions.""" pass @@ -19,14 +124,16 @@ class TimeoutError(CherryPyException): class InternalRedirect(CherryPyException): """Exception raised to switch to the handler for a different URL. - Any request.params must be supplied in a query string. + This exception will redirect processing to another path within the site + (without informing the client). Provide the new path as an argument when + raising the exception. Provide any params in the querystring for the new URL. """ - def __init__(self, path): + def __init__(self, path, query_string=""): import cherrypy - request = cherrypy.request + self.request = cherrypy.serving.request - self.query_string = "" + self.query_string = query_string if "?" in path: # Separate any params included in the path path, self.query_string = path.split("?", 1) @@ -35,7 +142,7 @@ class InternalRedirect(CherryPyException): # 1. a URL relative to root (e.g. "/dummy") # 2. a URL relative to the current path # Note that any query string will be discarded. - path = _urljoin(request.path_info, path) + path = _urljoin(self.request.path_info, path) # Set a 'path' member attribute so that code which traps this # error can have access to it. @@ -47,21 +154,48 @@ class InternalRedirect(CherryPyException): class HTTPRedirect(CherryPyException): """Exception raised when the request should be redirected. + This exception will force a HTTP redirect to the URL or URL's you give it. The new URL must be passed as the first argument to the Exception, - e.g., HTTPRedirect(newUrl). Multiple URLs are allowed. If a URL is - absolute, it will be used as-is. If it is relative, it is assumed - to be relative to the current cherrypy.request.path_info. + e.g., HTTPRedirect(newUrl). Multiple URLs are allowed in a list. + If a URL is absolute, it will be used as-is. If it is relative, it is + assumed to be relative to the current cherrypy.request.path_info. + + If one of the provided URL is a unicode object, it will be encoded + using the default encoding or the one passed in parameter. + + There are multiple types of redirect, from which you can select via the + ``status`` argument. If you do not provide a ``status`` arg, it defaults to + 303 (or 302 if responding with HTTP/1.0). + + Examples:: + + raise cherrypy.HTTPRedirect("") + raise cherrypy.HTTPRedirect("/abs/path", 307) + raise cherrypy.HTTPRedirect(["path1", "path2?a=1&b=2"], 301) + + See :ref:`redirectingpost` for additional caveats. """ - def __init__(self, urls, status=None): + status = None + """The integer HTTP status code to emit.""" + + urls = None + """The list of URL's to emit.""" + + encoding = 'utf-8' + """The encoding when passed urls are not native strings""" + + def __init__(self, urls, status=None, encoding=None): import cherrypy - request = cherrypy.request + request = cherrypy.serving.request if isinstance(urls, basestring): urls = [urls] abs_urls = [] for url in urls: + url = tonative(url, encoding or self.encoding) + # Note that urljoin will "do the right thing" whether url is: # 1. a complete URL with host (e.g. "http://www.example.com/test") # 2. a URL relative to root (e.g. "/dummy") @@ -73,7 +207,7 @@ class HTTPRedirect(CherryPyException): # RFC 2616 indicates a 301 response code fits our goal; however, # browser support for 301 is quite messy. Do 302/303 instead. See - # http://ppewww.ph.gla.ac.uk/~flavell/www/post-redirect.html + # http://www.alanflavell.org.uk/www/post-redirect.html if status is None: if request.protocol >= (1, 1): status = 303 @@ -94,11 +228,11 @@ class HTTPRedirect(CherryPyException): HTTPRedirect object and set its output without *raising* the exception. """ import cherrypy - response = cherrypy.response + response = cherrypy.serving.response response.status = status = self.status if status in (300, 301, 302, 303, 307): - response.headers['Content-Type'] = "text/html" + response.headers['Content-Type'] = "text/html;charset=utf-8" # "The ... URI SHOULD be given by the Location field # in the response." response.headers['Location'] = self.urls[0] @@ -112,7 +246,8 @@ class HTTPRedirect(CherryPyException): 303: "This resource can be found at %s.", 307: "This resource has moved temporarily to %s.", }[status] - response.body = "
\n".join([msg % (u, u) for u in self.urls]) + msgs = [msg % (u, u) for u in self.urls] + response.body = ntob("
\n".join(msgs), 'utf-8') # Previous code may have set C-L, so we have to reset it # (allow finalize to set it). response.headers.pop('Content-Length', None) @@ -153,7 +288,7 @@ def clean_headers(status): """Remove any headers which should not apply to an error response.""" import cherrypy - response = cherrypy.response + response = cherrypy.serving.response # Remove headers which applied to the original content, # but do not apply to the error page. @@ -161,7 +296,7 @@ def clean_headers(status): for key in ["Accept-Ranges", "Age", "ETag", "Location", "Retry-After", "Vary", "Content-Encoding", "Content-Length", "Expires", "Content-Location", "Content-MD5", "Last-Modified"]: - if respheaders.has_key(key): + if key in respheaders: del respheaders[key] if status != 416: @@ -171,25 +306,49 @@ def clean_headers(status): # specifies the current length of the selected resource. # A response with status code 206 (Partial Content) MUST NOT # include a Content-Range field with a byte-range- resp-spec of "*". - if respheaders.has_key("Content-Range"): + if "Content-Range" in respheaders: del respheaders["Content-Range"] class HTTPError(CherryPyException): - """ Exception used to return an HTTP error code (4xx-5xx) to the client. - This exception will automatically set the response status and body. + """Exception used to return an HTTP error code (4xx-5xx) to the client. + + This exception can be used to automatically send a response using a http status + code, with an appropriate error page. It takes an optional + ``status`` argument (which must be between 400 and 599); it defaults to 500 + ("Internal Server Error"). It also takes an optional ``message`` argument, + which will be returned in the response body. See + `RFC 2616 `_ + for a complete list of available error codes and when to use them. + + Examples:: - A custom message (a long description to display in the browser) - can be provided in place of the default. + raise cherrypy.HTTPError(403) + raise cherrypy.HTTPError("403 Forbidden", "You are not allowed to access this resource.") """ + status = None + """The HTTP status code. May be of type int or str (with a Reason-Phrase).""" + + code = None + """The integer HTTP status code.""" + + reason = None + """The HTTP Reason-Phrase string.""" + def __init__(self, status=500, message=None): - self.status = status = int(status) - if status < 400 or status > 599: + self.status = status + try: + self.code, self.reason, defaultmsg = _httputil.valid_status(status) + except ValueError: + raise self.__class__(500, _exc_info()[1].args[0]) + + if self.code < 400 or self.code > 599: raise ValueError("status must be between 400 and 599.") + # See http://www.python.org/dev/peps/pep-0352/ # self.message = message - self._message = message + self._message = message or defaultmsg CherryPyException.__init__(self, status, message) def set_response(self): @@ -200,24 +359,24 @@ class HTTPError(CherryPyException): """ import cherrypy - response = cherrypy.response + response = cherrypy.serving.response - clean_headers(self.status) + clean_headers(self.code) # In all cases, finalize will be called after this method, # so don't bother cleaning up response values here. response.status = self.status tb = None - if cherrypy.request.show_tracebacks: + if cherrypy.serving.request.show_tracebacks: tb = format_exc() - response.headers['Content-Type'] = "text/html" + response.headers['Content-Type'] = "text/html;charset=utf-8" + response.headers.pop('Content-Length', None) - content = self.get_error_page(self.status, traceback=tb, - message=self._message) + content = ntob(self.get_error_page(self.status, traceback=tb, + message=self._message), 'utf-8') response.body = content - response.headers['Content-Length'] = len(content) - _be_ie_unfriendly(self.status) + _be_ie_unfriendly(self.code) def get_error_page(self, *args, **kwargs): return get_error_page(*args, **kwargs) @@ -228,14 +387,19 @@ class HTTPError(CherryPyException): class NotFound(HTTPError): - """Exception raised when a URL could not be mapped to any handler (404).""" + """Exception raised when a URL could not be mapped to any handler (404). + + This is equivalent to raising + :class:`HTTPError("404 Not Found") `. + """ def __init__(self, path=None): if path is None: import cherrypy - path = cherrypy.request.script_name + cherrypy.request.path_info + request = cherrypy.serving.request + path = request.script_name + request.path_info self.args = (path,) - HTTPError.__init__(self, 404, "The path %r was not found." % path) + HTTPError.__init__(self, 404, "The path '%s' was not found." % path) _HTTPErrorTemplate = '''`, it does so +behind the scenes so that simple logging is simple, but complicated logging +is still possible. "Simple" logging means that you can log to the screen +(i.e. console/stdout) or to a file, and that you can easily have separate +error and access log files. + +Here are the simplified logging settings. You use these by adding lines to +your config file or dict. You should set these at either the global level or +per application (see next), but generally not both. + + * ``log.screen``: Set this to True to have both "error" and "access" messages + printed to stdout. + * ``log.access_file``: Set this to an absolute filename where you want + "access" messages written. + * ``log.error_file``: Set this to an absolute filename where you want "error" + messages written. + +Many events are automatically logged; to log your own application events, call +:func:`cherrypy.log`. + +Architecture +============ + +Separate scopes +--------------- + +CherryPy provides log managers at both the global and application layers. +This means you can have one set of logging rules for your entire site, +and another set of rules specific to each application. The global log +manager is found at :func:`cherrypy.log`, and the log manager for each +application is found at :attr:`app.log`. +If you're inside a request, the latter is reachable from +``cherrypy.request.app.log``; if you're outside a request, you'll have to obtain +a reference to the ``app``: either the return value of +:func:`tree.mount()` or, if you used +:func:`quickstart()` instead, via ``cherrypy.tree.apps['/']``. + +By default, the global logs are named "cherrypy.error" and "cherrypy.access", +and the application logs are named "cherrypy.error.2378745" and +"cherrypy.access.2378745" (the number is the id of the Application object). +This means that the application logs "bubble up" to the site logs, so if your +application has no log handlers, the site-level handlers will still log the +messages. + +Errors vs. Access +----------------- + +Each log manager handles both "access" messages (one per HTTP request) and +"error" messages (everything else). Note that the "error" log is not just for +errors! The format of access messages is highly formalized, but the error log +isn't--it receives messages from a variety of sources (including full error +tracebacks, if enabled). + + +Custom Handlers +=============== + +The simple settings above work by manipulating Python's standard :mod:`logging` +module. So when you need something more complex, the full power of the standard +module is yours to exploit. You can borrow or create custom handlers, formats, +filters, and much more. Here's an example that skips the standard FileHandler +and uses a RotatingFileHandler instead: + +:: + + #python + log = app.log + + # Remove the default FileHandlers if present. + log.error_file = "" + log.access_file = "" + + maxBytes = getattr(log, "rot_maxBytes", 10000000) + backupCount = getattr(log, "rot_backupCount", 1000) + + # Make a new RotatingFileHandler for the error log. + fname = getattr(log, "rot_error_file", "error.log") + h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount) + h.setLevel(DEBUG) + h.setFormatter(_cplogging.logfmt) + log.error_log.addHandler(h) + + # Make a new RotatingFileHandler for the access log. + fname = getattr(log, "rot_access_file", "access.log") + h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount) + h.setLevel(DEBUG) + h.setFormatter(_cplogging.logfmt) + log.access_log.addHandler(h) + + +The ``rot_*`` attributes are pulled straight from the application log object. +Since "log.*" config entries simply set attributes on the log object, you can +add custom attributes to your heart's content. Note that these handlers are +used ''instead'' of the default, simple handlers outlined above (so don't set +the "log.error_file" config entry, for example). +""" import datetime import logging @@ -6,20 +105,59 @@ import logging logging.Logger.manager.emittedNoHandlerWarning = 1 logfmt = logging.Formatter("%(message)s") import os -import rfc822 import sys import cherrypy from cherrypy import _cperror +from cherrypy._cpcompat import ntob, py3k + + +class NullHandler(logging.Handler): + """A no-op logging handler to silence the logging.lastResort handler.""" + + def handle(self, record): + pass + + def emit(self, record): + pass + + def createLock(self): + self.lock = None class LogManager(object): + """An object to assist both simple and advanced logging. + + ``cherrypy.log`` is an instance of this class. + """ appid = None + """The id() of the Application object which owns this log manager. If this + is a global log manager, appid is None.""" + error_log = None + """The actual :class:`logging.Logger` instance for error messages.""" + access_log = None - access_log_format = \ - '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' + """The actual :class:`logging.Logger` instance for access messages.""" + + if py3k: + access_log_format = \ + '{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"' + else: + access_log_format = \ + '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' + + logger_root = None + """The "top-level" logger name. + + This string will be used as the first segment in the Logger names. + The default is "cherrypy", for example, in which case the Logger names + will be of the form:: + + cherrypy.error. + cherrypy.access. + """ def __init__(self, appid=None, logger_root="cherrypy"): self.logger_root = logger_root @@ -30,10 +168,15 @@ class LogManager(object): else: self.error_log = logging.getLogger("%s.error.%s" % (logger_root, appid)) self.access_log = logging.getLogger("%s.access.%s" % (logger_root, appid)) - self.error_log.setLevel(logging.DEBUG) + self.error_log.setLevel(logging.INFO) self.access_log.setLevel(logging.INFO) + + # Silence the no-handlers "warning" (stderr write!) in stdlib logging + self.error_log.addHandler(NullHandler()) + self.access_log.addHandler(NullHandler()) + cherrypy.engine.subscribe('graceful', self.reopen_files) - + def reopen_files(self): """Close and reopen all file handlers.""" for log in (self.error_log, self.access_log): @@ -45,26 +188,32 @@ class LogManager(object): h.release() def error(self, msg='', context='', severity=logging.INFO, traceback=False): - """Write to the error log. + """Write the given ``msg`` to the error log. This is not just for errors! Applications may call this at any time to log application-specific information. + + If ``traceback`` is True, the traceback of the current exception + (if any) will be appended to ``msg``. """ if traceback: msg += _cperror.format_exc() self.error_log.log(severity, ' '.join((self.time(), context, msg))) def __call__(self, *args, **kwargs): - """Write to the error log. - - This is not just for errors! Applications may call this at any time - to log application-specific information. - """ + """An alias for ``error``.""" return self.error(*args, **kwargs) def access(self): """Write to the access log (in Apache/NCSA Combined Log format). + See http://httpd.apache.org/docs/2.0/logs.html#combined for format + details. + + CherryPy calls this automatically for you. Note there are no arguments; + it collects the data itself from + :class:`cherrypy.request`. + Like Apache started doing in 2.0.46, non-printable and other special characters in %r (and we expand that to all parts) are escaped using \\xhh sequences, where hh stands for the hexadecimal representation @@ -72,42 +221,72 @@ class LogManager(object): escaped by prepending a backslash, and all whitespace characters, which are written in their C-style notation (\\n, \\t, etc). """ - request = cherrypy.request + request = cherrypy.serving.request remote = request.remote - response = cherrypy.response + response = cherrypy.serving.response outheaders = response.headers inheaders = request.headers + if response.output_status is None: + status = "-" + else: + status = response.output_status.split(ntob(" "), 1)[0] + if py3k: + status = status.decode('ISO-8859-1') atoms = {'h': remote.name or remote.ip, 'l': '-', 'u': getattr(request, "login", None) or "-", 't': self.time(), 'r': request.request_line, - 's': response.status.split(" ", 1)[0], - 'b': outheaders.get('Content-Length', '') or "-", - 'f': inheaders.get('Referer', ''), - 'a': inheaders.get('User-Agent', ''), + 's': status, + 'b': dict.get(outheaders, 'Content-Length', '') or "-", + 'f': dict.get(inheaders, 'Referer', ''), + 'a': dict.get(inheaders, 'User-Agent', ''), } - for k, v in atoms.items(): - if isinstance(v, unicode): - v = v.encode('utf8') - elif not isinstance(v, str): - v = str(v) - # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc - # and backslash for us. All we have to do is strip the quotes. - v = repr(v)[1:-1] - # Escape double-quote. - atoms[k] = v.replace('"', '\\"') - - try: - self.access_log.log(logging.INFO, self.access_log_format % atoms) - except: - self(traceback=True) + if py3k: + for k, v in atoms.items(): + if not isinstance(v, str): + v = str(v) + v = v.replace('"', '\\"').encode('utf8') + # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc + # and backslash for us. All we have to do is strip the quotes. + v = repr(v)[2:-1] + + # in python 3.0 the repr of bytes (as returned by encode) + # uses double \'s. But then the logger escapes them yet, again + # resulting in quadruple slashes. Remove the extra one here. + v = v.replace('\\\\', '\\') + + # Escape double-quote. + atoms[k] = v + + try: + self.access_log.log(logging.INFO, self.access_log_format.format(**atoms)) + except: + self(traceback=True) + else: + for k, v in atoms.items(): + if isinstance(v, unicode): + v = v.encode('utf8') + elif not isinstance(v, str): + v = str(v) + # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc + # and backslash for us. All we have to do is strip the quotes. + v = repr(v)[1:-1] + # Escape double-quote. + atoms[k] = v.replace('"', '\\"') + + try: + self.access_log.log(logging.INFO, self.access_log_format % atoms) + except: + self(traceback=True) def time(self): """Return now() in Apache Common Log Format (no timezone).""" now = datetime.datetime.now() - month = rfc822._monthnames[now.month - 1].capitalize() + monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', + 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] + month = monthnames[now.month - 1].capitalize() return ('[%02d/%s/%04d:%02d:%02d:%02d]' % (now.day, month, now.year, now.hour, now.minute, now.second)) @@ -139,10 +318,13 @@ class LogManager(object): def _set_screen(self, newvalue): self._set_screen_handler(self.error_log, newvalue, stream=sys.stderr) - self._set_screen_handler(self.access_log, newvalue) + self._set_screen_handler(self.access_log, newvalue, stream=sys.stdout) screen = property(_get_screen, _set_screen, - doc="If True, error and access will print to stderr.") - + doc="""Turn stderr/stdout logging on or off. + + If you set this to True, it'll add the appropriate StreamHandler for + you. If you set it to False, it will remove the handler. + """) # -------------------------- File handlers -------------------------- # @@ -175,7 +357,11 @@ class LogManager(object): def _set_error_file(self, newvalue): self._set_file_handler(self.error_log, newvalue) error_file = property(_get_error_file, _set_error_file, - doc="The filename for self.error_log.") + doc="""The filename for self.error_log. + + If you set this to a string, it'll add the appropriate FileHandler for + you. If you set it to ``None`` or ``''``, it will remove the handler. + """) def _get_access_file(self): h = self._get_builtin_handler(self.access_log, "file") @@ -185,8 +371,11 @@ class LogManager(object): def _set_access_file(self, newvalue): self._set_file_handler(self.access_log, newvalue) access_file = property(_get_access_file, _set_access_file, - doc="The filename for self.access_log.") - + doc="""The filename for self.access_log. + + If you set this to a string, it'll add the appropriate FileHandler for + you. If you set it to ``None`` or ``''``, it will remove the handler. + """) # ------------------------- WSGI handlers ------------------------- # @@ -207,7 +396,13 @@ class LogManager(object): def _set_wsgi(self, newvalue): self._set_wsgi_handler(self.error_log, newvalue) wsgi = property(_get_wsgi, _set_wsgi, - doc="If True, error messages will be sent to wsgi.errors.") + doc="""Write errors to wsgi.errors. + + If you set this to True, it'll add the appropriate + :class:`WSGIErrorHandler` for you + (which writes errors to ``wsgi.errors``). + If you set it to False, it will remove the handler. + """) class WSGIErrorHandler(logging.Handler): @@ -216,7 +411,7 @@ class WSGIErrorHandler(logging.Handler): def flush(self): """Flushes the stream.""" try: - stream = cherrypy.request.wsgi_environ.get('wsgi.errors') + stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors') except (AttributeError, KeyError): pass else: @@ -225,7 +420,7 @@ class WSGIErrorHandler(logging.Handler): def emit(self, record): """Emit a record.""" try: - stream = cherrypy.request.wsgi_environ.get('wsgi.errors') + stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors') except (AttributeError, KeyError): pass else: diff --git a/src/cherrypy/_cpmodpy.py b/src/cherrypy/_cpmodpy.py index 991362a16e..76ef6ead68 100644 --- a/src/cherrypy/_cpmodpy.py +++ b/src/cherrypy/_cpmodpy.py @@ -56,12 +56,12 @@ Then restart apache2 and access http://127.0.0.1:8080 """ import logging -import StringIO +import sys import cherrypy +from cherrypy._cpcompat import BytesIO, copyitems, ntob from cherrypy._cperror import format_exc, bare_error -from cherrypy.lib import http - +from cherrypy.lib import httputil # ------------------------------ Request-handling @@ -71,17 +71,18 @@ from cherrypy.lib import http def setup(req): from mod_python import apache - # Run any setup function defined by a "PythonOption cherrypy.setup" directive. + # Run any setup functions defined by a "PythonOption cherrypy.setup" directive. options = req.get_options() if 'cherrypy.setup' in options: - atoms = options['cherrypy.setup'].split('::', 1) - if len(atoms) == 1: - mod = __import__(atoms[0], globals(), locals()) - else: - modname, fname = atoms - mod = __import__(modname, globals(), locals(), [fname]) - func = getattr(mod, fname) - func() + for function in options['cherrypy.setup'].split(): + atoms = function.split('::', 1) + if len(atoms) == 1: + mod = __import__(atoms[0], globals(), locals()) + else: + modname, fname = atoms + mod = __import__(modname, globals(), locals(), [fname]) + func = getattr(mod, fname) + func() cherrypy.config.update({'log.screen': False, "tools.ignore_headers.on": True, @@ -141,9 +142,9 @@ def handler(req): # Obtain a Request object from CherryPy local = req.connection.local_addr - local = http.Host(local[0], local[1], req.connection.local_host or "") + local = httputil.Host(local[0], local[1], req.connection.local_host or "") remote = req.connection.remote_addr - remote = http.Host(remote[0], remote[1], req.connection.remote_host or "") + remote = httputil.Host(remote[0], remote[1], req.connection.remote_host or "") scheme = req.parsed_uri[0] or 'http' req.get_basic_auth_pw() @@ -183,7 +184,7 @@ def handler(req): path = req.uri qs = req.args or "" reqproto = req.protocol - headers = req.headers_in.items() + headers = copyitems(req.headers_in) rfile = _ReadOnlyRequest(req) prev = None @@ -202,7 +203,8 @@ def handler(req): try: request.run(method, path, qs, reqproto, headers, rfile) break - except cherrypy.InternalRedirect, ir: + except cherrypy.InternalRedirect: + ir = sys.exc_info()[1] app.release_serving() prev = request @@ -220,9 +222,9 @@ def handler(req): method = "GET" path = ir.path qs = ir.query_string - rfile = StringIO.StringIO() + rfile = BytesIO() - send_response(req, response.status, response.header_list, + send_response(req, response.output_status, response.header_list, response.body, response.stream) finally: app.release_serving() @@ -264,13 +266,25 @@ def send_response(req, status, headers, body, stream=False): import os import re +try: + import subprocess + def popen(fullcmd): + p = subprocess.Popen(fullcmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + close_fds=True) + return p.stdout +except ImportError: + def popen(fullcmd): + pipein, pipeout = os.popen4(fullcmd) + return pipeout def read_process(cmd, args=""): - pipein, pipeout = os.popen4("%s %s" % (cmd, args)) + fullcmd = "%s %s" % (cmd, args) + pipeout = popen(fullcmd) try: firstline = pipeout.readline() - if (re.search(r"(not recognized|No such file|not found)", firstline, + if (re.search(ntob("(not recognized|No such file|not found)"), firstline, re.IGNORECASE)): raise IOError('%s must be on your system path.' % cmd) output = firstline + pipeout.read() diff --git a/src/cherrypy/_cpnative_server.py b/src/cherrypy/_cpnative_server.py new file mode 100644 index 0000000000..57f715a930 --- /dev/null +++ b/src/cherrypy/_cpnative_server.py @@ -0,0 +1,149 @@ +"""Native adapter for serving CherryPy via its builtin server.""" + +import logging +import sys + +import cherrypy +from cherrypy._cpcompat import BytesIO +from cherrypy._cperror import format_exc, bare_error +from cherrypy.lib import httputil +from cherrypy import wsgiserver + + +class NativeGateway(wsgiserver.Gateway): + + recursive = False + + def respond(self): + req = self.req + try: + # Obtain a Request object from CherryPy + local = req.server.bind_addr + local = httputil.Host(local[0], local[1], "") + remote = req.conn.remote_addr, req.conn.remote_port + remote = httputil.Host(remote[0], remote[1], "") + + scheme = req.scheme + sn = cherrypy.tree.script_name(req.uri or "/") + if sn is None: + self.send_response('404 Not Found', [], ['']) + else: + app = cherrypy.tree.apps[sn] + method = req.method + path = req.path + qs = req.qs or "" + headers = req.inheaders.items() + rfile = req.rfile + prev = None + + try: + redirections = [] + while True: + request, response = app.get_serving( + local, remote, scheme, "HTTP/1.1") + request.multithread = True + request.multiprocess = False + request.app = app + request.prev = prev + + # Run the CherryPy Request object and obtain the response + try: + request.run(method, path, qs, req.request_protocol, headers, rfile) + break + except cherrypy.InternalRedirect: + ir = sys.exc_info()[1] + app.release_serving() + prev = request + + if not self.recursive: + if ir.path in redirections: + raise RuntimeError("InternalRedirector visited the " + "same URL twice: %r" % ir.path) + else: + # Add the *previous* path_info + qs to redirections. + if qs: + qs = "?" + qs + redirections.append(sn + path + qs) + + # Munge environment and try again. + method = "GET" + path = ir.path + qs = ir.query_string + rfile = BytesIO() + + self.send_response( + response.output_status, response.header_list, + response.body) + finally: + app.release_serving() + except: + tb = format_exc() + #print tb + cherrypy.log(tb, 'NATIVE_ADAPTER', severity=logging.ERROR) + s, h, b = bare_error() + self.send_response(s, h, b) + + def send_response(self, status, headers, body): + req = self.req + + # Set response status + req.status = str(status or "500 Server Error") + + # Set response headers + for header, value in headers: + req.outheaders.append((header, value)) + if (req.ready and not req.sent_headers): + req.sent_headers = True + req.send_headers() + + # Set response body + for seg in body: + req.write(seg) + + +class CPHTTPServer(wsgiserver.HTTPServer): + """Wrapper for wsgiserver.HTTPServer. + + wsgiserver has been designed to not reference CherryPy in any way, + so that it can be used in other frameworks and applications. + Therefore, we wrap it here, so we can apply some attributes + from config -> cherrypy.server -> HTTPServer. + """ + + def __init__(self, server_adapter=cherrypy.server): + self.server_adapter = server_adapter + + server_name = (self.server_adapter.socket_host or + self.server_adapter.socket_file or + None) + + wsgiserver.HTTPServer.__init__( + self, server_adapter.bind_addr, NativeGateway, + minthreads=server_adapter.thread_pool, + maxthreads=server_adapter.thread_pool_max, + server_name=server_name) + + self.max_request_header_size = self.server_adapter.max_request_header_size or 0 + self.max_request_body_size = self.server_adapter.max_request_body_size or 0 + self.request_queue_size = self.server_adapter.socket_queue_size + self.timeout = self.server_adapter.socket_timeout + self.shutdown_timeout = self.server_adapter.shutdown_timeout + self.protocol = self.server_adapter.protocol_version + self.nodelay = self.server_adapter.nodelay + + ssl_module = self.server_adapter.ssl_module or 'pyopenssl' + if self.server_adapter.ssl_context: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + self.ssl_adapter.context = self.server_adapter.ssl_context + elif self.server_adapter.ssl_certificate: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + + diff --git a/src/cherrypy/_cpreqbody.py b/src/cherrypy/_cpreqbody.py new file mode 100644 index 0000000000..5d72c854b3 --- /dev/null +++ b/src/cherrypy/_cpreqbody.py @@ -0,0 +1,965 @@ +"""Request body processing for CherryPy. + +.. versionadded:: 3.2 + +Application authors have complete control over the parsing of HTTP request +entities. In short, :attr:`cherrypy.request.body` +is now always set to an instance of :class:`RequestBody`, +and *that* class is a subclass of :class:`Entity`. + +When an HTTP request includes an entity body, it is often desirable to +provide that information to applications in a form other than the raw bytes. +Different content types demand different approaches. Examples: + + * For a GIF file, we want the raw bytes in a stream. + * An HTML form is better parsed into its component fields, and each text field + decoded from bytes to unicode. + * A JSON body should be deserialized into a Python dict or list. + +When the request contains a Content-Type header, the media type is used as a +key to look up a value in the +:attr:`request.body.processors` dict. +If the full media +type is not found, then the major type is tried; for example, if no processor +is found for the 'image/jpeg' type, then we look for a processor for the 'image' +types altogether. If neither the full type nor the major type has a matching +processor, then a default processor is used +(:func:`default_proc`). For most +types, this means no processing is done, and the body is left unread as a +raw byte stream. Processors are configurable in an 'on_start_resource' hook. + +Some processors, especially those for the 'text' types, attempt to decode bytes +to unicode. If the Content-Type request header includes a 'charset' parameter, +this is used to decode the entity. Otherwise, one or more default charsets may +be attempted, although this decision is up to each processor. If a processor +successfully decodes an Entity or Part, it should set the +:attr:`charset` attribute +on the Entity or Part to the name of the successful charset, so that +applications can easily re-encode or transcode the value if they wish. + +If the Content-Type of the request entity is of major type 'multipart', then +the above parsing process, and possibly a decoding process, is performed for +each part. + +For both the full entity and multipart parts, a Content-Disposition header may +be used to fill :attr:`name` and +:attr:`filename` attributes on the +request.body or the Part. + +.. _custombodyprocessors: + +Custom Processors +================= + +You can add your own processors for any specific or major MIME type. Simply add +it to the :attr:`processors` dict in a +hook/tool that runs at ``on_start_resource`` or ``before_request_body``. +Here's the built-in JSON tool for an example:: + + def json_in(force=True, debug=False): + request = cherrypy.serving.request + def json_processor(entity): + \"""Read application/json data into request.json.\""" + if not entity.headers.get("Content-Length", ""): + raise cherrypy.HTTPError(411) + + body = entity.fp.read() + try: + request.json = json_decode(body) + except ValueError: + raise cherrypy.HTTPError(400, 'Invalid JSON document') + if force: + request.body.processors.clear() + request.body.default_proc = cherrypy.HTTPError( + 415, 'Expected an application/json content type') + request.body.processors['application/json'] = json_processor + +We begin by defining a new ``json_processor`` function to stick in the ``processors`` +dictionary. All processor functions take a single argument, the ``Entity`` instance +they are to process. It will be called whenever a request is received (for those +URI's where the tool is turned on) which has a ``Content-Type`` of +"application/json". + +First, it checks for a valid ``Content-Length`` (raising 411 if not valid), then +reads the remaining bytes on the socket. The ``fp`` object knows its own length, so +it won't hang waiting for data that never arrives. It will return when all data +has been read. Then, we decode those bytes using Python's built-in ``json`` module, +and stick the decoded result onto ``request.json`` . If it cannot be decoded, we +raise 400. + +If the "force" argument is True (the default), the ``Tool`` clears the ``processors`` +dict so that request entities of other ``Content-Types`` aren't parsed at all. Since +there's no entry for those invalid MIME types, the ``default_proc`` method of ``cherrypy.request.body`` +is called. But this does nothing by default (usually to provide the page handler an opportunity to handle it.) +But in our case, we want to raise 415, so we replace ``request.body.default_proc`` +with the error (``HTTPError`` instances, when called, raise themselves). + +If we were defining a custom processor, we can do so without making a ``Tool``. Just add the config entry:: + + request.body.processors = {'application/json': json_processor} + +Note that you can only replace the ``processors`` dict wholesale this way, not update the existing one. +""" + +try: + from io import DEFAULT_BUFFER_SIZE +except ImportError: + DEFAULT_BUFFER_SIZE = 8192 +import re +import sys +import tempfile +try: + from urllib import unquote_plus +except ImportError: + def unquote_plus(bs): + """Bytes version of urllib.parse.unquote_plus.""" + bs = bs.replace(ntob('+'), ntob(' ')) + atoms = bs.split(ntob('%')) + for i in range(1, len(atoms)): + item = atoms[i] + try: + pct = int(item[:2], 16) + atoms[i] = bytes([pct]) + item[2:] + except ValueError: + pass + return ntob('').join(atoms) + +import cherrypy +from cherrypy._cpcompat import basestring, ntob, ntou +from cherrypy.lib import httputil + + +# -------------------------------- Processors -------------------------------- # + +def process_urlencoded(entity): + """Read application/x-www-form-urlencoded data into entity.params.""" + qs = entity.fp.read() + for charset in entity.attempt_charsets: + try: + params = {} + for aparam in qs.split(ntob('&')): + for pair in aparam.split(ntob(';')): + if not pair: + continue + + atoms = pair.split(ntob('='), 1) + if len(atoms) == 1: + atoms.append(ntob('')) + + key = unquote_plus(atoms[0]).decode(charset) + value = unquote_plus(atoms[1]).decode(charset) + + if key in params: + if not isinstance(params[key], list): + params[key] = [params[key]] + params[key].append(value) + else: + params[key] = value + except UnicodeDecodeError: + pass + else: + entity.charset = charset + break + else: + raise cherrypy.HTTPError( + 400, "The request entity could not be decoded. The following " + "charsets were attempted: %s" % repr(entity.attempt_charsets)) + + # Now that all values have been successfully parsed and decoded, + # apply them to the entity.params dict. + for key, value in params.items(): + if key in entity.params: + if not isinstance(entity.params[key], list): + entity.params[key] = [entity.params[key]] + entity.params[key].append(value) + else: + entity.params[key] = value + + +def process_multipart(entity): + """Read all multipart parts into entity.parts.""" + ib = "" + if 'boundary' in entity.content_type.params: + # http://tools.ietf.org/html/rfc2046#section-5.1.1 + # "The grammar for parameters on the Content-type field is such that it + # is often necessary to enclose the boundary parameter values in quotes + # on the Content-type line" + ib = entity.content_type.params['boundary'].strip('"') + + if not re.match("^[ -~]{0,200}[!-~]$", ib): + raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) + + ib = ('--' + ib).encode('ascii') + + # Find the first marker + while True: + b = entity.readline() + if not b: + return + + b = b.strip() + if b == ib: + break + + # Read all parts + while True: + part = entity.part_class.from_fp(entity.fp, ib) + entity.parts.append(part) + part.process() + if part.fp.done: + break + +def process_multipart_form_data(entity): + """Read all multipart/form-data parts into entity.parts or entity.params.""" + process_multipart(entity) + + kept_parts = [] + for part in entity.parts: + if part.name is None: + kept_parts.append(part) + else: + if part.filename is None: + # It's a regular field + value = part.fullvalue() + else: + # It's a file upload. Retain the whole part so consumer code + # has access to its .file and .filename attributes. + value = part + + if part.name in entity.params: + if not isinstance(entity.params[part.name], list): + entity.params[part.name] = [entity.params[part.name]] + entity.params[part.name].append(value) + else: + entity.params[part.name] = value + + entity.parts = kept_parts + +def _old_process_multipart(entity): + """The behavior of 3.2 and lower. Deprecated and will be changed in 3.3.""" + process_multipart(entity) + + params = entity.params + + for part in entity.parts: + if part.name is None: + key = ntou('parts') + else: + key = part.name + + if part.filename is None: + # It's a regular field + value = part.fullvalue() + else: + # It's a file upload. Retain the whole part so consumer code + # has access to its .file and .filename attributes. + value = part + + if key in params: + if not isinstance(params[key], list): + params[key] = [params[key]] + params[key].append(value) + else: + params[key] = value + + + +# --------------------------------- Entities --------------------------------- # + + +class Entity(object): + """An HTTP request body, or MIME multipart body. + + This class collects information about the HTTP request entity. When a + given entity is of MIME type "multipart", each part is parsed into its own + Entity instance, and the set of parts stored in + :attr:`entity.parts`. + + Between the ``before_request_body`` and ``before_handler`` tools, CherryPy + tries to process the request body (if any) by calling + :func:`request.body.process`, a dict. + If a matching processor cannot be found for the complete Content-Type, + it tries again using the major type. For example, if a request with an + entity of type "image/jpeg" arrives, but no processor can be found for + that complete type, then one is sought for the major type "image". If a + processor is still not found, then the + :func:`default_proc` method of the + Entity is called (which does nothing by default; you can override this too). + + CherryPy includes processors for the "application/x-www-form-urlencoded" + type, the "multipart/form-data" type, and the "multipart" major type. + CherryPy 3.2 processes these types almost exactly as older versions. + Parts are passed as arguments to the page handler using their + ``Content-Disposition.name`` if given, otherwise in a generic "parts" + argument. Each such part is either a string, or the + :class:`Part` itself if it's a file. (In this + case it will have ``file`` and ``filename`` attributes, or possibly a + ``value`` attribute). Each Part is itself a subclass of + Entity, and has its own ``process`` method and ``processors`` dict. + + There is a separate processor for the "multipart" major type which is more + flexible, and simply stores all multipart parts in + :attr:`request.body.parts`. You can + enable it with:: + + cherrypy.request.body.processors['multipart'] = _cpreqbody.process_multipart + + in an ``on_start_resource`` tool. + """ + + # http://tools.ietf.org/html/rfc2046#section-4.1.2: + # "The default character set, which must be assumed in the + # absence of a charset parameter, is US-ASCII." + # However, many browsers send data in utf-8 with no charset. + attempt_charsets = ['utf-8'] + """A list of strings, each of which should be a known encoding. + + When the Content-Type of the request body warrants it, each of the given + encodings will be tried in order. The first one to successfully decode the + entity without raising an error is stored as + :attr:`entity.charset`. This defaults + to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by + `HTTP/1.1 `_), + but ``['us-ascii', 'utf-8']`` for multipart parts. + """ + + charset = None + """The successful decoding; see "attempt_charsets" above.""" + + content_type = None + """The value of the Content-Type request header. + + If the Entity is part of a multipart payload, this will be the Content-Type + given in the MIME headers for this part. + """ + + default_content_type = 'application/x-www-form-urlencoded' + """This defines a default ``Content-Type`` to use if no Content-Type header + is given. The empty string is used for RequestBody, which results in the + request body not being read or parsed at all. This is by design; a missing + ``Content-Type`` header in the HTTP request entity is an error at best, + and a security hole at worst. For multipart parts, however, the MIME spec + declares that a part with no Content-Type defaults to "text/plain" + (see :class:`Part`). + """ + + filename = None + """The ``Content-Disposition.filename`` header, if available.""" + + fp = None + """The readable socket file object.""" + + headers = None + """A dict of request/multipart header names and values. + + This is a copy of the ``request.headers`` for the ``request.body``; + for multipart parts, it is the set of headers for that part. + """ + + length = None + """The value of the ``Content-Length`` header, if provided.""" + + name = None + """The "name" parameter of the ``Content-Disposition`` header, if any.""" + + params = None + """ + If the request Content-Type is 'application/x-www-form-urlencoded' or + multipart, this will be a dict of the params pulled from the entity + body; that is, it will be the portion of request.params that come + from the message body (sometimes called "POST params", although they + can be sent with various HTTP method verbs). This value is set between + the 'before_request_body' and 'before_handler' hooks (assuming that + process_request_body is True).""" + + processors = {'application/x-www-form-urlencoded': process_urlencoded, + 'multipart/form-data': process_multipart_form_data, + 'multipart': process_multipart, + } + """A dict of Content-Type names to processor methods.""" + + parts = None + """A list of Part instances if ``Content-Type`` is of major type "multipart".""" + + part_class = None + """The class used for multipart parts. + + You can replace this with custom subclasses to alter the processing of + multipart parts. + """ + + def __init__(self, fp, headers, params=None, parts=None): + # Make an instance-specific copy of the class processors + # so Tools, etc. can replace them per-request. + self.processors = self.processors.copy() + + self.fp = fp + self.headers = headers + + if params is None: + params = {} + self.params = params + + if parts is None: + parts = [] + self.parts = parts + + # Content-Type + self.content_type = headers.elements('Content-Type') + if self.content_type: + self.content_type = self.content_type[0] + else: + self.content_type = httputil.HeaderElement.from_str( + self.default_content_type) + + # Copy the class 'attempt_charsets', prepending any Content-Type charset + dec = self.content_type.params.get("charset", None) + if dec: + self.attempt_charsets = [dec] + [c for c in self.attempt_charsets + if c != dec] + else: + self.attempt_charsets = self.attempt_charsets[:] + + # Length + self.length = None + clen = headers.get('Content-Length', None) + # If Transfer-Encoding is 'chunked', ignore any Content-Length. + if clen is not None and 'chunked' not in headers.get('Transfer-Encoding', ''): + try: + self.length = int(clen) + except ValueError: + pass + + # Content-Disposition + self.name = None + self.filename = None + disp = headers.elements('Content-Disposition') + if disp: + disp = disp[0] + if 'name' in disp.params: + self.name = disp.params['name'] + if self.name.startswith('"') and self.name.endswith('"'): + self.name = self.name[1:-1] + if 'filename' in disp.params: + self.filename = disp.params['filename'] + if self.filename.startswith('"') and self.filename.endswith('"'): + self.filename = self.filename[1:-1] + + # The 'type' attribute is deprecated in 3.2; remove it in 3.3. + type = property(lambda self: self.content_type, + doc="""A deprecated alias for :attr:`content_type`.""") + + def read(self, size=None, fp_out=None): + return self.fp.read(size, fp_out) + + def readline(self, size=None): + return self.fp.readline(size) + + def readlines(self, sizehint=None): + return self.fp.readlines(sizehint) + + def __iter__(self): + return self + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def next(self): + return self.__next__() + + def read_into_file(self, fp_out=None): + """Read the request body into fp_out (or make_file() if None). Return fp_out.""" + if fp_out is None: + fp_out = self.make_file() + self.read(fp_out=fp_out) + return fp_out + + def make_file(self): + """Return a file-like object into which the request body will be read. + + By default, this will return a TemporaryFile. Override as needed. + See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`.""" + return tempfile.TemporaryFile() + + def fullvalue(self): + """Return this entity as a string, whether stored in a file or not.""" + if self.file: + # It was stored in a tempfile. Read it. + self.file.seek(0) + value = self.file.read() + self.file.seek(0) + else: + value = self.value + return value + + def process(self): + """Execute the best-match processor for the given media type.""" + proc = None + ct = self.content_type.value + try: + proc = self.processors[ct] + except KeyError: + toptype = ct.split('/', 1)[0] + try: + proc = self.processors[toptype] + except KeyError: + pass + if proc is None: + self.default_proc() + else: + proc(self) + + def default_proc(self): + """Called if a more-specific processor is not found for the ``Content-Type``.""" + # Leave the fp alone for someone else to read. This works fine + # for request.body, but the Part subclasses need to override this + # so they can move on to the next part. + pass + + +class Part(Entity): + """A MIME part entity, part of a multipart entity.""" + + # "The default character set, which must be assumed in the absence of a + # charset parameter, is US-ASCII." + attempt_charsets = ['us-ascii', 'utf-8'] + """A list of strings, each of which should be a known encoding. + + When the Content-Type of the request body warrants it, each of the given + encodings will be tried in order. The first one to successfully decode the + entity without raising an error is stored as + :attr:`entity.charset`. This defaults + to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by + `HTTP/1.1 `_), + but ``['us-ascii', 'utf-8']`` for multipart parts. + """ + + boundary = None + """The MIME multipart boundary.""" + + default_content_type = 'text/plain' + """This defines a default ``Content-Type`` to use if no Content-Type header + is given. The empty string is used for RequestBody, which results in the + request body not being read or parsed at all. This is by design; a missing + ``Content-Type`` header in the HTTP request entity is an error at best, + and a security hole at worst. For multipart parts, however (this class), + the MIME spec declares that a part with no Content-Type defaults to + "text/plain". + """ + + # This is the default in stdlib cgi. We may want to increase it. + maxrambytes = 1000 + """The threshold of bytes after which point the ``Part`` will store its data + in a file (generated by :func:`make_file`) + instead of a string. Defaults to 1000, just like the :mod:`cgi` module in + Python's standard library. + """ + + def __init__(self, fp, headers, boundary): + Entity.__init__(self, fp, headers) + self.boundary = boundary + self.file = None + self.value = None + + def from_fp(cls, fp, boundary): + headers = cls.read_headers(fp) + return cls(fp, headers, boundary) + from_fp = classmethod(from_fp) + + def read_headers(cls, fp): + headers = httputil.HeaderMap() + while True: + line = fp.readline() + if not line: + # No more data--illegal end of headers + raise EOFError("Illegal end of headers.") + + if line == ntob('\r\n'): + # Normal end of headers + break + if not line.endswith(ntob('\r\n')): + raise ValueError("MIME requires CRLF terminators: %r" % line) + + if line[0] in ntob(' \t'): + # It's a continuation line. + v = line.strip().decode('ISO-8859-1') + else: + k, v = line.split(ntob(":"), 1) + k = k.strip().decode('ISO-8859-1') + v = v.strip().decode('ISO-8859-1') + + existing = headers.get(k) + if existing: + v = ", ".join((existing, v)) + headers[k] = v + + return headers + read_headers = classmethod(read_headers) + + def read_lines_to_boundary(self, fp_out=None): + """Read bytes from self.fp and return or write them to a file. + + If the 'fp_out' argument is None (the default), all bytes read are + returned in a single byte string. + + If the 'fp_out' argument is not None, it must be a file-like object that + supports the 'write' method; all bytes read will be written to the fp, + and that fp is returned. + """ + endmarker = self.boundary + ntob("--") + delim = ntob("") + prev_lf = True + lines = [] + seen = 0 + while True: + line = self.fp.readline(1<<16) + if not line: + raise EOFError("Illegal end of multipart body.") + if line.startswith(ntob("--")) and prev_lf: + strippedline = line.strip() + if strippedline == self.boundary: + break + if strippedline == endmarker: + self.fp.finish() + break + + line = delim + line + + if line.endswith(ntob("\r\n")): + delim = ntob("\r\n") + line = line[:-2] + prev_lf = True + elif line.endswith(ntob("\n")): + delim = ntob("\n") + line = line[:-1] + prev_lf = True + else: + delim = ntob("") + prev_lf = False + + if fp_out is None: + lines.append(line) + seen += len(line) + if seen > self.maxrambytes: + fp_out = self.make_file() + for line in lines: + fp_out.write(line) + else: + fp_out.write(line) + + if fp_out is None: + result = ntob('').join(lines) + for charset in self.attempt_charsets: + try: + result = result.decode(charset) + except UnicodeDecodeError: + pass + else: + self.charset = charset + return result + else: + raise cherrypy.HTTPError( + 400, "The request entity could not be decoded. The following " + "charsets were attempted: %s" % repr(self.attempt_charsets)) + else: + fp_out.seek(0) + return fp_out + + def default_proc(self): + """Called if a more-specific processor is not found for the ``Content-Type``.""" + if self.filename: + # Always read into a file if a .filename was given. + self.file = self.read_into_file() + else: + result = self.read_lines_to_boundary() + if isinstance(result, basestring): + self.value = result + else: + self.file = result + + def read_into_file(self, fp_out=None): + """Read the request body into fp_out (or make_file() if None). Return fp_out.""" + if fp_out is None: + fp_out = self.make_file() + self.read_lines_to_boundary(fp_out=fp_out) + return fp_out + +Entity.part_class = Part + +try: + inf = float('inf') +except ValueError: + # Python 2.4 and lower + class Infinity(object): + def __cmp__(self, other): + return 1 + def __sub__(self, other): + return self + inf = Infinity() + + +comma_separated_headers = ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', 'Connection', + 'Content-Encoding', 'Content-Language', 'Expect', 'If-Match', + 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'Te', 'Trailer', + 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', 'Www-Authenticate'] + + +class SizedReader: + + def __init__(self, fp, length, maxbytes, bufsize=DEFAULT_BUFFER_SIZE, has_trailers=False): + # Wrap our fp in a buffer so peek() works + self.fp = fp + self.length = length + self.maxbytes = maxbytes + self.buffer = ntob('') + self.bufsize = bufsize + self.bytes_read = 0 + self.done = False + self.has_trailers = has_trailers + + def read(self, size=None, fp_out=None): + """Read bytes from the request body and return or write them to a file. + + A number of bytes less than or equal to the 'size' argument are read + off the socket. The actual number of bytes read are tracked in + self.bytes_read. The number may be smaller than 'size' when 1) the + client sends fewer bytes, 2) the 'Content-Length' request header + specifies fewer bytes than requested, or 3) the number of bytes read + exceeds self.maxbytes (in which case, 413 is raised). + + If the 'fp_out' argument is None (the default), all bytes read are + returned in a single byte string. + + If the 'fp_out' argument is not None, it must be a file-like object that + supports the 'write' method; all bytes read will be written to the fp, + and None is returned. + """ + + if self.length is None: + if size is None: + remaining = inf + else: + remaining = size + else: + remaining = self.length - self.bytes_read + if size and size < remaining: + remaining = size + if remaining == 0: + self.finish() + if fp_out is None: + return ntob('') + else: + return None + + chunks = [] + + # Read bytes from the buffer. + if self.buffer: + if remaining is inf: + data = self.buffer + self.buffer = ntob('') + else: + data = self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + datalen = len(data) + remaining -= datalen + + # Check lengths. + self.bytes_read += datalen + if self.maxbytes and self.bytes_read > self.maxbytes: + raise cherrypy.HTTPError(413) + + # Store the data. + if fp_out is None: + chunks.append(data) + else: + fp_out.write(data) + + # Read bytes from the socket. + while remaining > 0: + chunksize = min(remaining, self.bufsize) + try: + data = self.fp.read(chunksize) + except Exception: + e = sys.exc_info()[1] + if e.__class__.__name__ == 'MaxSizeExceeded': + # Post data is too big + raise cherrypy.HTTPError( + 413, "Maximum request length: %r" % e.args[1]) + else: + raise + if not data: + self.finish() + break + datalen = len(data) + remaining -= datalen + + # Check lengths. + self.bytes_read += datalen + if self.maxbytes and self.bytes_read > self.maxbytes: + raise cherrypy.HTTPError(413) + + # Store the data. + if fp_out is None: + chunks.append(data) + else: + fp_out.write(data) + + if fp_out is None: + return ntob('').join(chunks) + + def readline(self, size=None): + """Read a line from the request body and return it.""" + chunks = [] + while size is None or size > 0: + chunksize = self.bufsize + if size is not None and size < self.bufsize: + chunksize = size + data = self.read(chunksize) + if not data: + break + pos = data.find(ntob('\n')) + 1 + if pos: + chunks.append(data[:pos]) + remainder = data[pos:] + self.buffer += remainder + self.bytes_read -= len(remainder) + break + else: + chunks.append(data) + return ntob('').join(chunks) + + def readlines(self, sizehint=None): + """Read lines from the request body and return them.""" + if self.length is not None: + if sizehint is None: + sizehint = self.length - self.bytes_read + else: + sizehint = min(sizehint, self.length - self.bytes_read) + + lines = [] + seen = 0 + while True: + line = self.readline() + if not line: + break + lines.append(line) + seen += len(line) + if seen >= sizehint: + break + return lines + + def finish(self): + self.done = True + if self.has_trailers and hasattr(self.fp, 'read_trailer_lines'): + self.trailers = {} + + try: + for line in self.fp.read_trailer_lines(): + if line[0] in ntob(' \t'): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(ntob(":"), 1) + except ValueError: + raise ValueError("Illegal header line.") + k = k.strip().title() + v = v.strip() + + if k in comma_separated_headers: + existing = self.trailers.get(envname) + if existing: + v = ntob(", ").join((existing, v)) + self.trailers[k] = v + except Exception: + e = sys.exc_info()[1] + if e.__class__.__name__ == 'MaxSizeExceeded': + # Post data is too big + raise cherrypy.HTTPError( + 413, "Maximum request length: %r" % e.args[1]) + else: + raise + + +class RequestBody(Entity): + """The entity of the HTTP request.""" + + bufsize = 8 * 1024 + """The buffer size used when reading the socket.""" + + # Don't parse the request body at all if the client didn't provide + # a Content-Type header. See http://www.cherrypy.org/ticket/790 + default_content_type = '' + """This defines a default ``Content-Type`` to use if no Content-Type header + is given. The empty string is used for RequestBody, which results in the + request body not being read or parsed at all. This is by design; a missing + ``Content-Type`` header in the HTTP request entity is an error at best, + and a security hole at worst. For multipart parts, however, the MIME spec + declares that a part with no Content-Type defaults to "text/plain" + (see :class:`Part`). + """ + + maxbytes = None + """Raise ``MaxSizeExceeded`` if more bytes than this are read from the socket.""" + + def __init__(self, fp, headers, params=None, request_params=None): + Entity.__init__(self, fp, headers, params) + + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + # When no explicit charset parameter is provided by the + # sender, media subtypes of the "text" type are defined + # to have a default charset value of "ISO-8859-1" when + # received via HTTP. + if self.content_type.value.startswith('text/'): + for c in ('ISO-8859-1', 'iso-8859-1', 'Latin-1', 'latin-1'): + if c in self.attempt_charsets: + break + else: + self.attempt_charsets.append('ISO-8859-1') + + # Temporary fix while deprecating passing .parts as .params. + self.processors['multipart'] = _old_process_multipart + + if request_params is None: + request_params = {} + self.request_params = request_params + + def process(self): + """Process the request entity based on its Content-Type.""" + # "The presence of a message-body in a request is signaled by the + # inclusion of a Content-Length or Transfer-Encoding header field in + # the request's message-headers." + # It is possible to send a POST request with no body, for example; + # however, app developers are responsible in that case to set + # cherrypy.request.process_body to False so this method isn't called. + h = cherrypy.serving.request.headers + if 'Content-Length' not in h and 'Transfer-Encoding' not in h: + raise cherrypy.HTTPError(411) + + self.fp = SizedReader(self.fp, self.length, + self.maxbytes, bufsize=self.bufsize, + has_trailers='Trailer' in h) + super(RequestBody, self).process() + + # Body params should also be a part of the request_params + # add them in here. + request_params = self.request_params + for key, value in self.params.items(): + # Python 2 only: keyword arguments must be byte strings (type 'str'). + if sys.version_info < (3, 0): + if isinstance(key, unicode): + key = key.encode('ISO-8859-1') + + if key in request_params: + if not isinstance(request_params[key], list): + request_params[key] = [request_params[key]] + request_params[key].append(value) + else: + request_params[key] = value diff --git a/src/cherrypy/_cprequest.py b/src/cherrypy/_cprequest.py index 23bdad23b6..5890c7282f 100644 --- a/src/cherrypy/_cprequest.py +++ b/src/cherrypy/_cprequest.py @@ -1,39 +1,38 @@ -import Cookie import os import sys import time -import types +import warnings import cherrypy -from cherrypy import _cpcgifs, _cpconfig +from cherrypy._cpcompat import basestring, copykeys, ntob, unicodestr +from cherrypy._cpcompat import SimpleCookie, CookieError, py3k +from cherrypy import _cpreqbody, _cpconfig from cherrypy._cperror import format_exc, bare_error -from cherrypy.lib import http, file_generator +from cherrypy.lib import httputil, file_generator class Hook(object): """A callback and its metadata: failsafe, priority, and kwargs.""" - __metaclass__ = cherrypy._AttributeDocstrings - callback = None - callback__doc = """ + """ The bare callable that this Hook object is wrapping, which will be called when the Hook is called.""" failsafe = False - failsafe__doc = """ + """ If True, the callback is guaranteed to run even if other callbacks from the same call point raise exceptions.""" priority = 50 - priority__doc = """ + """ Defines the order of execution for a list of Hooks. Priority numbers should be limited to the closed interval [0, 100], but values outside this range are acceptable, as are fractional values.""" kwargs = {} - kwargs__doc = """ + """ A set of keyword arguments that will be passed to the callable on each call.""" @@ -50,7 +49,12 @@ class Hook(object): self.kwargs = kwargs + def __lt__(self, other): + # Python 3 + return self.priority < other.priority + def __cmp__(self, other): + # Python 2 return cmp(self.priority, other.priority) def __call__(self): @@ -63,7 +67,7 @@ class Hook(object): % (cls.__module__, cls.__name__, self.callback, self.failsafe, self.priority, ", ".join(['%s=%r' % (k, v) - for k, v in self.kwargs.iteritems()]))) + for k, v in self.kwargs.items()]))) class HookMap(dict): @@ -105,20 +109,20 @@ class HookMap(dict): exc = sys.exc_info()[1] cherrypy.log(traceback=True, severity=40) if exc: - raise + raise exc def __copy__(self): newmap = self.__class__() # We can't just use 'update' because we want copies of the # mutable values (each is a list) as well. - for k, v in self.iteritems(): + for k, v in self.items(): newmap[k] = v[:] return newmap copy = __copy__ def __repr__(self): cls = self.__class__ - return "%s.%s(points=%r)" % (cls.__module__, cls.__name__, self.keys()) + return "%s.%s(points=%r)" % (cls.__module__, cls.__name__, copykeys(self)) # Config namespace handlers @@ -133,21 +137,30 @@ def hooks_namespace(k, v): v = cherrypy.lib.attributes(v) if not isinstance(v, Hook): v = Hook(v) - cherrypy.request.hooks[hookpoint].append(v) + cherrypy.serving.request.hooks[hookpoint].append(v) def request_namespace(k, v): """Attach request attributes declared in config.""" - setattr(cherrypy.request, k, v) + # Provides config entries to set request.body attrs (like attempt_charsets). + if k[:5] == 'body.': + setattr(cherrypy.serving.request.body, k[5:], v) + else: + setattr(cherrypy.serving.request, k, v) def response_namespace(k, v): """Attach response attributes declared in config.""" - setattr(cherrypy.response, k, v) + # Provides config entries to set default response headers + # http://cherrypy.org/ticket/889 + if k[:8] == 'headers.': + cherrypy.serving.response.headers[k.split('.', 1)[1]] = v + else: + setattr(cherrypy.serving.response, k, v) def error_page_namespace(k, v): """Attach error pages declared in config.""" if k != 'default': k = int(k) - cherrypy.request.error_page[k] = v + cherrypy.serving.request.error_page[k] = v hookpoints = ['on_start_resource', 'before_request_body', @@ -169,44 +182,44 @@ class Request(object): the given URL, and the execution plan for generating a response. """ - __metaclass__ = cherrypy._AttributeDocstrings - prev = None - prev__doc = """ + """ The previous Request object (if any). This should be None unless we are processing an InternalRedirect.""" # Conversation/connection attributes - local = http.Host("127.0.0.1", 80) - local__doc = \ - "An http.Host(ip, port, hostname) object for the server socket." + local = httputil.Host("127.0.0.1", 80) + "An httputil.Host(ip, port, hostname) object for the server socket." - remote = http.Host("127.0.0.1", 1111) - remote__doc = \ - "An http.Host(ip, port, hostname) object for the client socket." + remote = httputil.Host("127.0.0.1", 1111) + "An httputil.Host(ip, port, hostname) object for the client socket." scheme = "http" - scheme__doc = """ + """ The protocol used between client and server. In most cases, this will be either 'http' or 'https'.""" server_protocol = "HTTP/1.1" - server_protocol__doc = """ + """ The HTTP version for which the HTTP server is at least conditionally compliant.""" base = "" - base__doc = """The (scheme://host) portion of the requested URL.""" + """The (scheme://host) portion of the requested URL. + In some cases (e.g. when proxying via mod_rewrite), this may contain + path segments which cherrypy.url uses when constructing url's, but + which otherwise are ignored by CherryPy. Regardless, this value + MUST NOT end in a slash.""" # Request-Line attributes request_line = "" - request_line__doc = """ + """ The complete Request-Line received from the client. This is a single string consisting of the request method, URI, and protocol version (joined by spaces). Any final CRLF is removed.""" method = "GET" - method__doc = """ + """ Indicates the HTTP method to be performed on the resource identified by the Request-URI. Common methods include GET, HEAD, POST, PUT, and DELETE. CherryPy allows any extension method; however, various HTTP @@ -214,23 +227,32 @@ class Request(object): CherryPy applications SHOULD restrict the set (on a per-URI basis).""" query_string = "" - query_string__doc = """ + """ The query component of the Request-URI, a string of information to be interpreted by the resource. The query portion of a URI follows the path component, and is separated by a '?'. For example, the URI 'http://www.cherrypy.org/wiki?a=3&b=4' has the query component, 'a=3&b=4'.""" + query_string_encoding = 'utf8' + """ + The encoding expected for query string arguments after % HEX HEX decoding). + If a query string is provided that cannot be decoded with this encoding, + 404 is raised (since technically it's a different URI). If you want + arbitrary encodings to not error, set this to 'Latin-1'; you can then + encode back to bytes and re-decode to whatever encoding you like later. + """ + protocol = (1, 1) - protocol__doc = """The HTTP protocol version corresponding to the set - of features which should be allowed in the response. If BOTH - the client's request message AND the server's level of HTTP - compliance is HTTP/1.1, this attribute will be the tuple (1, 1). - If either is 1.0, this attribute will be the tuple (1, 0). - Lower HTTP protocol versions are not explicitly supported.""" + """The HTTP protocol version corresponding to the set + of features which should be allowed in the response. If BOTH + the client's request message AND the server's level of HTTP + compliance is HTTP/1.1, this attribute will be the tuple (1, 1). + If either is 1.0, this attribute will be the tuple (1, 0). + Lower HTTP protocol versions are not explicitly supported.""" params = {} - params__doc = """ + """ A dict which combines query string (GET) and request entity (POST) variables. This is populated in two stages: GET params are added before the 'on_start_resource' hook, and POST params are added @@ -238,24 +260,24 @@ class Request(object): # Message attributes header_list = [] - header_list__doc = """ + """ A list of the HTTP request headers as (name, value) tuples. In general, you should use request.headers (a dict) instead.""" - headers = http.HeaderMap() - headers__doc = """ + headers = httputil.HeaderMap() + """ A dict-like object containing the request headers. Keys are header names (in Title-Case format); however, you may get and set them in a case-insensitive manner. That is, headers['Content-Type'] and headers['content-type'] refer to the same value. Values are header - values (decoded according to RFC 2047 if necessary). See also: - http.HeaderMap, http.HeaderElement.""" + values (decoded according to :rfc:`2047` if necessary). See also: + httputil.HeaderMap, httputil.HeaderElement.""" - cookie = Cookie.SimpleCookie() - cookie__doc = """See help(Cookie).""" + cookie = SimpleCookie() + """See help(Cookie).""" rfile = None - rfile__doc = """ + """ If the request included an entity (body), it will be available as a stream in this attribute. However, the rfile will normally be read for you between the 'before_request_body' hook and the @@ -275,36 +297,26 @@ class Request(object): """ process_request_body = True - process_request_body__doc = """ + """ If True, the rfile (if any) is automatically read and parsed, and the result placed into request.params or request.body.""" methods_with_bodies = ("POST", "PUT") - methods_with_bodies__doc = """ + """ A sequence of HTTP methods for which CherryPy will automatically attempt to read a body from the rfile.""" body = None - body__doc = """ + """ If the request Content-Type is 'application/x-www-form-urlencoded' - or multipart, this will be None. Otherwise, this will contain the - request entity body as a string; this value is set between the - 'before_request_body' and 'before_handler' hooks (assuming that - process_request_body is True).""" - - body_params = None - body_params__doc = """ - If the request Content-Type is 'application/x-www-form-urlencoded' or - multipart, this will be a dict of the params pulled from the entity - body; that is, it will be the portion of request.params that come - from the message body (sometimes called "POST params", although they - can be sent with various HTTP method verbs). This value is set between - the 'before_request_body' and 'before_handler' hooks (assuming that - process_request_body is True).""" + or multipart, this will be None. Otherwise, this will be an instance + of :class:`RequestBody` (which you + can .read()); this value is set between the 'before_request_body' and + 'before_handler' hooks (assuming that process_request_body is True).""" # Dispatch attributes dispatch = cherrypy.dispatch.Dispatcher() - dispatch__doc = """ + """ The object which looks up the 'page handler' callable and collects config for the current request based on the path_info, other request attributes, and the application architecture. The core @@ -316,7 +328,7 @@ class Request(object): See help(cherrypy.dispatch) for more information.""" script_name = "" - script_name__doc = """ + """ The 'mount point' of the application which is handling this request. This attribute MUST NOT end in a slash. If the script_name refers to @@ -324,13 +336,13 @@ class Request(object): """ path_info = "/" - path_info__doc = """ + """ The 'relative path' portion of the Request-URI. This is relative to the script_name ('mount point') of the application which is handling this request.""" login = None - login__doc = """ + """ When authentication is used during the request processing this is set to 'False' if it failed and to the 'username' value if it succeeded. The default 'None' implies that no authentication happened.""" @@ -338,11 +350,10 @@ class Request(object): # Note that cherrypy.url uses "if request.app:" to determine whether # the call is during a real HTTP request or not. So leave this None. app = None - app__doc = \ - """The cherrypy.Application object which is handling this request.""" + """The cherrypy.Application object which is handling this request.""" handler = None - handler__doc = """ + """ The function, method, or other callable which CherryPy will call to produce the response. The discovery of the handler and the arguments it will receive are determined by the request.dispatch object. @@ -351,12 +362,12 @@ class Request(object): (from the query string and POST body) as keyword arguments.""" toolmaps = {} - toolmaps__doc = """ + """ A nested dict of all Toolboxes and Tools in effect for this request, of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" config = None - config__doc = """ + """ A flat dict of all configuration entries which apply to the current request. These entries are collected from global config, application config (based on request.path_info), and from handler @@ -366,7 +377,7 @@ class Request(object): and inherits downward).""" is_index = None - is_index__doc = """ + """ This will be True if the current request is mapped to an 'index' resource handler (also, a 'default' handler if path_info ends with a slash). The value may be used to automatically redirect the @@ -374,7 +385,7 @@ class Request(object): the trailing slash. See cherrypy.tools.trailing_slash.""" hooks = HookMap(hookpoints) - hooks__doc = """ + """ A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. Each key is a str naming the hook point, and each value is a list of hooks which will be called at that hook point during this request. @@ -383,7 +394,7 @@ class Request(object): See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" error_response = cherrypy.HTTPError(500).set_response - error_response__doc = """ + """ The no-arg callable which will handle unexpected, untrapped errors during request processing. This is not used for expected exceptions (like NotFound, HTTPError, or HTTPRedirect) which are raised in @@ -393,7 +404,7 @@ class Request(object): error response to the user-agent.""" error_page = {} - error_page__doc = """ + """ A dict of {error code: response filename or callable} pairs. The error code must be an int representing a given HTTP error code, @@ -406,36 +417,39 @@ class Request(object): and %(version)s. The set of format mappings can be extended by overriding HTTPError.set_response. - If a callable is provided, it will be called by default with keyword + If a callable is provided, it will be called by default with keyword arguments 'status', 'message', 'traceback', and 'version', as for a - string-formatting template. The callable must return a string which - will be set to response.body. It may also override headers or perform - any other processing. + string-formatting template. The callable must return a string or iterable of + strings which will be set to response.body. It may also override headers or + perform any other processing. If no entry is given for an error code, and no 'default' entry exists, a default template will be used. """ show_tracebacks = True - show_tracebacks__doc = """ + """ If True, unexpected errors encountered during request processing will include a traceback in the response body.""" + + show_mismatched_params = True + """ + If True, mismatched parameters encountered during PageHandler invocation + processing will be included in the response body.""" throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect) - throws__doc = \ - """The sequence of exceptions which Request.run does not trap.""" + """The sequence of exceptions which Request.run does not trap.""" throw_errors = False - throw_errors__doc = """ + """ If True, Request.run will not trap any errors (except HTTPRedirect and HTTPError, which are more properly called 'exceptions', not errors).""" closed = False - closed__doc = """ - True once the close method has been called, False otherwise.""" + """True once the close method has been called, False otherwise.""" stage = None - stage__doc = """ + """ A string containing the stage reached in the request-handling process. This is useful when debugging a live server with hung requests.""" @@ -451,8 +465,8 @@ class Request(object): server_protocol="HTTP/1.1"): """Populate a new Request object. - local_host should be an http.Host object with the server info. - remote_host should be an http.Host object with the client info. + local_host should be an httputil.Host object with the server info. + remote_host should be an httputil.Host object with the client info. scheme should be a string, either "http" or "https". """ self.local = local_host @@ -479,23 +493,38 @@ class Request(object): self.stage = 'close' def run(self, method, path, query_string, req_protocol, headers, rfile): - """Process the Request. (Core) + r"""Process the Request. (Core) method, path, query_string, and req_protocol should be pulled directly - from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). - path should be %XX-unquoted, but query_string should not be. - headers should be a list of (name, value) tuples. - rfile should be a file-like object containing the HTTP request entity. + from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). + + path + This should be %XX-unquoted, but query_string should not be. + + When using Python 2, they both MUST be byte strings, + not unicode strings. + + When using Python 3, they both MUST be unicode strings, + not byte strings, and preferably not bytes \x00-\xFF + disguised as unicode. + + headers + A list of (name, value) tuples. + + rfile + A file-like object containing the HTTP request entity. When run() is done, the returned object should have 3 attributes: - status, e.g. "200 OK" - header_list, a list of (name, value) tuples - body, an iterable yielding strings + + * status, e.g. "200 OK" + * header_list, a list of (name, value) tuples + * body, an iterable yielding strings Consumer code (HTTP servers) should then access these response attributes to build the outbound stream. """ + response = cherrypy.serving.response self.stage = 'run' try: self.error_response = cherrypy.HTTPError(500).set_response @@ -503,6 +532,7 @@ class Request(object): self.method = method path = path or "/" self.query_string = query_string or '' + self.params = {} # Compare request and server HTTP protocol versions, in case our # server does not support the requested protocol. Limit our output @@ -519,6 +549,7 @@ class Request(object): rp = int(req_protocol[5]), int(req_protocol[7]) sp = int(self.server_protocol[5]), int(self.server_protocol[7]) self.protocol = min(rp, sp) + response.headers.protocol = self.protocol # Rebuild first line of the request (e.g. "GET /path HTTP/1.0"). url = path @@ -527,9 +558,12 @@ class Request(object): self.request_line = '%s %s %s' % (method, url, req_protocol) self.header_list = list(headers) + self.headers = httputil.HeaderMap() + self.rfile = rfile - self.headers = http.HeaderMap() - self.cookie = Cookie.SimpleCookie() + self.body = None + + self.cookie = SimpleCookie() self.handler = None # path_info should be the path from the @@ -554,22 +588,28 @@ class Request(object): else: body = "" r = bare_error(body) - response = cherrypy.response - response.status, response.header_list, response.body = r + response.output_status, response.header_list, response.body = r if self.method == "HEAD": # HEAD requests MUST NOT return a message-body in the response. - cherrypy.response.body = [] + response.body = [] - cherrypy.log.access() + try: + cherrypy.log.access() + except: + cherrypy.log.error(traceback=True) - if cherrypy.response.timed_out: + if response.timed_out: raise cherrypy.TimeoutError() - return cherrypy.response + return response + + # Uncomment for stage debugging + # stage = property(lambda self: self._stage, lambda self, v: print(v)) def respond(self, path_info): """Generate a response for the resource at self.path_info. (Core)""" + response = cherrypy.serving.response try: try: try: @@ -583,36 +623,48 @@ class Request(object): # Make a copy of the class hooks self.hooks = self.__class__.hooks.copy() self.toolmaps = {} + self.stage = 'get_resource' self.get_resource(path_info) + + self.body = _cpreqbody.RequestBody( + self.rfile, self.headers, request_params=self.params) + self.namespaces(self.config) self.stage = 'on_start_resource' self.hooks.run('on_start_resource') + # Parse the querystring + self.stage = 'process_query_string' + self.process_query_string() + + # Process the body if self.process_request_body: if self.method not in self.methods_with_bodies: self.process_request_body = False - self.stage = 'before_request_body' self.hooks.run('before_request_body') if self.process_request_body: - self.process_body() + self.body.process() + # Run the handler self.stage = 'before_handler' self.hooks.run('before_handler') if self.handler: self.stage = 'handler' - cherrypy.response.body = self.handler() + response.body = self.handler() + # Finalize self.stage = 'before_finalize' self.hooks.run('before_finalize') - cherrypy.response.finalize() - except (cherrypy.HTTPRedirect, cherrypy.HTTPError), inst: + response.finalize() + except (cherrypy.HTTPRedirect, cherrypy.HTTPError): + inst = sys.exc_info()[1] inst.set_response() self.stage = 'before_finalize (HTTPError)' self.hooks.run('before_finalize') - cherrypy.response.finalize() + response.finalize() finally: self.stage = 'on_end_resource' self.hooks.run('on_end_resource') @@ -623,10 +675,27 @@ class Request(object): raise self.handle_error() + def process_query_string(self): + """Parse the query string into Python structures. (Core)""" + try: + p = httputil.parse_query_string( + self.query_string, encoding=self.query_string_encoding) + except UnicodeDecodeError: + raise cherrypy.HTTPError( + 404, "The given query string could not be processed. Query " + "strings for this resource must be encoded with %r." % + self.query_string_encoding) + + # Python 2 only: keyword arguments must be byte strings (type 'str'). + if not py3k: + for key, value in p.items(): + if isinstance(key, unicode): + del p[key] + p[key.encode(self.query_string_encoding)] = value + self.params.update(p) + def process_headers(self): """Parse HTTP header data into Python structures. (Core)""" - self.params = http.parse_query_string(self.query_string) - # Process the headers into self.headers headers = self.headers for name, value in self.header_list: @@ -639,7 +708,7 @@ class Request(object): # only Konqueror does that), only the last one will remain in headers # (but they will be correctly stored in request.cookie). if "=?" in value: - dict.__setitem__(headers, name, http.decode_TEXT(value)) + dict.__setitem__(headers, name, httputil.decode_TEXT(value)) else: dict.__setitem__(headers, name, value) @@ -648,7 +717,7 @@ class Request(object): if name == 'Cookie': try: self.cookie.load(value) - except Cookie.CookieError: + except CookieError: msg = "Illegal cookie name %s" % value.split('=')[0] raise cherrypy.HTTPError(400, msg) @@ -666,78 +735,14 @@ class Request(object): def get_resource(self, path): """Call a dispatcher (which sets self.handler and .config). (Core)""" - dispatch = self.dispatch # First, see if there is a custom dispatch at this URI. Custom # dispatchers can only be specified in app.config, not in _cp_config # (since custom dispatchers may not even have an app.root). - trail = path or "/" - while trail: - nodeconf = self.app.config.get(trail, {}) - - d = nodeconf.get("request.dispatch") - if d: - dispatch = d - break - - lastslash = trail.rfind("/") - if lastslash == -1: - break - elif lastslash == 0 and trail != "/": - trail = "/" - else: - trail = trail[:lastslash] + dispatch = self.app.find_config(path, "request.dispatch", self.dispatch) # dispatch() should set self.handler and self.config dispatch(path) - def process_body(self): - """Convert request.rfile into request.params (or request.body). (Core)""" - if not self.headers.get("Content-Length", ""): - # No Content-Length header supplied (or it's 0). - # If we went ahead and called cgi.FieldStorage, it would hang, - # since it cannot determine when to stop reading from the socket. - # See http://www.cherrypy.org/ticket/493. - # See also http://www.cherrypy.org/ticket/650. - # Note also that we expect any HTTP server to have decoded - # any message-body that had a transfer-coding, and we expect - # the HTTP server to have supplied a Content-Length header - # which is valid for the decoded entity-body. - raise cherrypy.HTTPError(411) - - # If the headers are missing "Content-Type" then add one - # with an empty value. This ensures that FieldStorage - # won't parse the request body for params if the client - # didn't provide a "Content-Type" header. - if 'Content-Type' not in self.headers: - h = http.HeaderMap(self.headers.items()) - h['Content-Type'] = '' - else: - h = self.headers - - try: - forms = _cpcgifs.FieldStorage(fp=self.rfile, - headers=h, - # FieldStorage only recognizes POST. - environ={'REQUEST_METHOD': "POST"}, - keep_blank_values=1) - except Exception, e: - if e.__class__.__name__ == 'MaxSizeExceeded': - # Post data is too big - raise cherrypy.HTTPError(413) - else: - raise - - # Note that, if headers['Content-Type'] is multipart/*, - # then forms.file will not exist; instead, each form[key] - # item will be its own file object, and will be handled - # by params_from_CGI_form. - if forms.file: - # request body was a content-type other than form params. - self.body = forms.file - else: - self.body_params = p = http.params_from_CGI_form(forms) - self.params.update(p) - def handle_error(self): """Handle the last unanticipated exception. (Core)""" try: @@ -745,15 +750,42 @@ class Request(object): if self.error_response: self.error_response() self.hooks.run("after_error_response") - cherrypy.response.finalize() - except cherrypy.HTTPRedirect, inst: + cherrypy.serving.response.finalize() + except cherrypy.HTTPRedirect: + inst = sys.exc_info()[1] inst.set_response() - cherrypy.response.finalize() + cherrypy.serving.response.finalize() + + # ------------------------- Properties ------------------------- # + + def _get_body_params(self): + warnings.warn( + "body_params is deprecated in CherryPy 3.2, will be removed in " + "CherryPy 3.3.", + DeprecationWarning + ) + return self.body.params + body_params = property(_get_body_params, + doc= """ + If the request Content-Type is 'application/x-www-form-urlencoded' or + multipart, this will be a dict of the params pulled from the entity + body; that is, it will be the portion of request.params that come + from the message body (sometimes called "POST params", although they + can be sent with various HTTP method verbs). This value is set between + the 'before_request_body' and 'before_handler' hooks (assuming that + process_request_body is True). + + Deprecated in 3.2, will be removed for 3.3 in favor of + :attr:`request.body.params`.""") -class Body(object): +class ResponseBody(object): """The body of the HTTP response (the response entity).""" + if py3k: + unicode_err = ("Page handlers MUST return bytes. Use tools.encode " + "if you wish to return unicode.") + def __get__(self, obj, objclass=None): if obj is None: # When calling on the class instead of an instance... @@ -763,6 +795,9 @@ class Body(object): def __set__(self, obj, value): # Convert the given value to an iterable object. + if py3k and isinstance(value, str): + raise ValueError(self.unicode_err) + if isinstance(value, basestring): # strings get wrapped in a list because iterating over a single # item list is much faster than iterating over every character @@ -772,7 +807,14 @@ class Body(object): else: # [''] doesn't evaluate to False, so replace it with []. value = [] - elif isinstance(value, types.FileType): + elif py3k and isinstance(value, list): + # every item in a list must be bytes... + for i, item in enumerate(value): + if isinstance(item, str): + raise ValueError(self.unicode_err) + # Don't use isinstance here; io.IOBase which has an ABC takes + # 1000 times as long as, say, isinstance(value, str) + elif hasattr(value, 'read'): value = file_generator(value) elif value is None: value = [] @@ -780,53 +822,48 @@ class Body(object): class Response(object): - """An HTTP Response, including status, headers, and body. + """An HTTP Response, including status, headers, and body.""" - Application developers should use Response.headers (a dict) to - set or modify HTTP response headers. When the response is finalized, - Response.headers is transformed into Response.header_list as - (key, value) tuples. - """ - - __metaclass__ = cherrypy._AttributeDocstrings - - # Class attributes for dev-time introspection. status = "" - status__doc = """The HTTP Status-Code and Reason-Phrase.""" + """The HTTP Status-Code and Reason-Phrase.""" header_list = [] - header_list__doc = """ + """ A list of the HTTP response headers as (name, value) tuples. - In general, you should use response.headers (a dict) instead.""" + In general, you should use response.headers (a dict) instead. This + attribute is generated from response.headers and is not valid until + after the finalize phase.""" - headers = http.HeaderMap() - headers__doc = """ + headers = httputil.HeaderMap() + """ A dict-like object containing the response headers. Keys are header names (in Title-Case format); however, you may get and set them in a case-insensitive manner. That is, headers['Content-Type'] and headers['content-type'] refer to the same value. Values are header - values (decoded according to RFC 2047 if necessary). See also: - http.HeaderMap, http.HeaderElement.""" + values (decoded according to :rfc:`2047` if necessary). - cookie = Cookie.SimpleCookie() - cookie__doc = """See help(Cookie).""" + .. seealso:: classes :class:`HeaderMap`, :class:`HeaderElement` + """ - body = Body() - body__doc = """The body (entity) of the HTTP response.""" + cookie = SimpleCookie() + """See help(Cookie).""" + + body = ResponseBody() + """The body (entity) of the HTTP response.""" time = None - time__doc = """The value of time.time() when created. Use in HTTP dates.""" + """The value of time.time() when created. Use in HTTP dates.""" timeout = 300 - timeout__doc = """Seconds after which the response will be aborted.""" + """Seconds after which the response will be aborted.""" timed_out = False - timed_out__doc = """ + """ Flag to indicate the response should be aborted, because it has exceeded its timeout.""" stream = False - stream__doc = """If False, buffer the response body.""" + """If False, buffer the response body.""" def __init__(self): self.status = None @@ -834,33 +871,47 @@ class Response(object): self._body = [] self.time = time.time() - self.headers = http.HeaderMap() + self.headers = httputil.HeaderMap() # Since we know all our keys are titled strings, we can # bypass HeaderMap.update and get a big speed boost. dict.update(self.headers, { "Content-Type": 'text/html', "Server": "CherryPy/" + cherrypy.__version__, - "Date": http.HTTPDate(self.time), + "Date": httputil.HTTPDate(self.time), }) - self.cookie = Cookie.SimpleCookie() + self.cookie = SimpleCookie() def collapse_body(self): """Collapse self.body to a single string; replace it and return it.""" - newbody = ''.join([chunk for chunk in self.body]) + if isinstance(self.body, basestring): + return self.body + + newbody = [] + for chunk in self.body: + if py3k and not isinstance(chunk, bytes): + raise TypeError("Chunk %s is not of type 'bytes'." % repr(chunk)) + newbody.append(chunk) + newbody = ntob('').join(newbody) + self.body = newbody return newbody def finalize(self): """Transform headers (and cookies) into self.header_list. (Core)""" try: - code, reason, _ = http.valid_status(self.status) - except ValueError, x: - raise cherrypy.HTTPError(500, x.args[0]) - - self.status = "%s %s" % (code, reason) + code, reason, _ = httputil.valid_status(self.status) + except ValueError: + raise cherrypy.HTTPError(500, sys.exc_info()[1].args[0]) headers = self.headers + + self.status = "%s %s" % (code, reason) + self.output_status = ntob(str(code), 'ascii') + ntob(" ") + headers.encode(reason) + if self.stream: + # The upshot: wsgiserver will chunk the response if + # you pop Content-Length (or set it explicitly to None). + # Note that lib.static sets C-L to the file's st_size. if dict.get(headers, 'Content-Length') is None: dict.pop(headers, 'Content-Length', None) elif code < 200 or code in (204, 205, 304): @@ -868,7 +919,7 @@ class Response(object): # and 304 (not modified) responses MUST NOT # include a message-body." dict.pop(headers, 'Content-Length', None) - self.body = "" + self.body = ntob("") else: # Responses which are not streamed should have a Content-Length, # but allow user code to set Content-Length if desired. @@ -877,7 +928,7 @@ class Response(object): dict.__setitem__(headers, 'Content-Length', len(content)) # Transform our header dict into a list of tuples. - self.header_list = h = headers.output(cherrypy.request.protocol) + self.header_list = h = headers.output() cookie = self.cookie.output() if cookie: @@ -886,6 +937,10 @@ class Response(object): # Python 2.4 emits cookies joined by LF but 2.5+ by CRLF. line = line[:-1] name, value = line.split(": ", 1) + if isinstance(name, unicodestr): + name = name.encode("ISO-8859-1") + if isinstance(value, unicodestr): + value = headers.encode(value) h.append((name, value)) def check_timeout(self): diff --git a/src/cherrypy/_cpserver.py b/src/cherrypy/_cpserver.py index 0888295bec..2eecd6ec04 100644 --- a/src/cherrypy/_cpserver.py +++ b/src/cherrypy/_cpserver.py @@ -4,6 +4,7 @@ import warnings import cherrypy from cherrypy.lib import attributes +from cherrypy._cpcompat import basestring, py3k # We import * because we want to export check_port # et al as attributes of this module. @@ -15,20 +16,21 @@ class Server(ServerAdapter): You can set attributes (like socket_host and socket_port) on *this* object (which is probably cherrypy.server), and call - quickstart. For example: + quickstart. For example:: cherrypy.server.socket_port = 80 cherrypy.quickstart() """ socket_port = 8080 + """The TCP port on which to listen for connections.""" _socket_host = '127.0.0.1' def _get_socket_host(self): return self._socket_host def _set_socket_host(self, value): - if not value: - raise ValueError("Host values of '' or None are not allowed. " + if value == '': + raise ValueError("The empty string ('') is not an allowed value. " "Use '0.0.0.0' instead to listen on all active " "interfaces (INADDR_ANY).") self._socket_host = value @@ -42,29 +44,93 @@ class Server(ServerAdapter): is the similar IN6ADDR_ANY for IPv6. The empty string or None are not allowed.""") - socket_file = '' + socket_file = None + """If given, the name of the UNIX socket to use instead of TCP/IP. + + When this option is not None, the `socket_host` and `socket_port` options + are ignored.""" + socket_queue_size = 5 + """The 'backlog' argument to socket.listen(); specifies the maximum number + of queued connections (default 5).""" + socket_timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + shutdown_timeout = 5 + """The time to wait for HTTP worker threads to clean up.""" + protocol_version = 'HTTP/1.1' - reverse_dns = False + """The version string to write in the Status-Line of all HTTP responses, + for example, "HTTP/1.1" (the default). Depending on the HTTP server used, + this should also limit the supported features used in the response.""" + thread_pool = 10 + """The number of worker threads to start up in the pool.""" + thread_pool_max = -1 + """The maximum size of the worker-thread pool. Use -1 to indicate no limit.""" + max_request_header_size = 500 * 1024 + """The maximum number of bytes allowable in the request headers. If exceeded, + the HTTP server should return "413 Request Entity Too Large".""" + max_request_body_size = 100 * 1024 * 1024 + """The maximum number of bytes allowable in the request body. If exceeded, + the HTTP server should return "413 Request Entity Too Large".""" + instance = None + """If not None, this should be an HTTP server instance (such as + CPWSGIServer) which cherrypy.server will control. Use this when you need + more control over object instantiation than is available in the various + configuration options.""" + + ssl_context = None + """When using PyOpenSSL, an instance of SSL.Context.""" + ssl_certificate = None + """The filename of the SSL certificate to use.""" + + ssl_certificate_chain = None + """When using PyOpenSSL, the certificate chain to pass to + Context.load_verify_locations.""" + ssl_private_key = None + """The filename of the private key to use with SSL.""" + + if py3k: + ssl_module = 'builtin' + """The name of a registered SSL adaptation module to use with the builtin + WSGI server. Builtin options are: 'builtin' (to use the SSL library built + into recent versions of Python). You may also register your + own classes in the wsgiserver.ssl_adapters dict.""" + else: + ssl_module = 'pyopenssl' + """The name of a registered SSL adaptation module to use with the builtin + WSGI server. Builtin options are 'builtin' (to use the SSL library built + into recent versions of Python) and 'pyopenssl' (to use the PyOpenSSL + project, which you must install separately). You may also register your + own classes in the wsgiserver.ssl_adapters dict.""" + + statistics = False + """Turns statistics-gathering on or off for aware HTTP servers.""" + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + wsgi_version = (1, 0) + """The WSGI version tuple to use with the builtin WSGI server. + The provided options are (1, 0) [which includes support for PEP 3333, + which declares it covers WSGI version 1.0.1 but still mandates the + wsgi.version (1, 0)] and ('u', 0), an experimental unicode version. + You may create and register your own experimental versions of the WSGI + protocol by adding custom classes to the wsgiserver.wsgi_gateways dict.""" def __init__(self): - ServerAdapter.__init__(self, cherrypy.engine) - - def quickstart(self, server=None): - """This does nothing now and will be removed in 3.2.""" - warnings.warn('quickstart does nothing now and will be removed in ' - '3.2. Call cherrypy.engine.start() instead.', - DeprecationWarning) + self.bus = cherrypy.engine + self.httpserver = None + self.interrupt = None + self.running = False def httpserver_from_self(self, httpserver=None): """Return a (httpserver, bind_addr) pair based on self attributes.""" @@ -72,16 +138,11 @@ class Server(ServerAdapter): httpserver = self.instance if httpserver is None: from cherrypy import _cpwsgi_server - httpserver = _cpwsgi_server.CPWSGIServer() + httpserver = _cpwsgi_server.CPWSGIServer(self) if isinstance(httpserver, basestring): - httpserver = attributes(httpserver)() - - if self.socket_file: - return httpserver, self.socket_file - - host = self.socket_host - port = self.socket_port - return httpserver, (host, port) + # Is anyone using this? Can I add an arg? + httpserver = attributes(httpserver)(self) + return httpserver, self.bind_addr def start(self): """Start the HTTP server.""" @@ -90,8 +151,34 @@ class Server(ServerAdapter): ServerAdapter.start(self) start.priority = 75 + def _get_bind_addr(self): + if self.socket_file: + return self.socket_file + if self.socket_host is None and self.socket_port is None: + return None + return (self.socket_host, self.socket_port) + def _set_bind_addr(self, value): + if value is None: + self.socket_file = None + self.socket_host = None + self.socket_port = None + elif isinstance(value, basestring): + self.socket_file = value + self.socket_host = None + self.socket_port = None + else: + try: + self.socket_host, self.socket_port = value + self.socket_file = None + except ValueError: + raise ValueError("bind_addr must be a (host, port) tuple " + "(for TCP sockets) or a string (for Unix " + "domain sockets), not %r" % value) + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc='A (host, port) tuple for TCP sockets or a str for Unix domain sockets.') + def base(self): - """Return the base (scheme://host) for this server.""" + """Return the base (scheme://host[:port] or sock file) for this server.""" if self.socket_file: return self.socket_file diff --git a/src/cherrypy/_cptools.py b/src/cherrypy/_cptools.py index 80ff583d22..22316b31b5 100644 --- a/src/cherrypy/_cptools.py +++ b/src/cherrypy/_cptools.py @@ -3,18 +3,18 @@ Tools are usually designed to be used in a variety of ways (although some may only offer one if they choose): - Library calls: + Library calls All tools are callables that can be used wherever needed. The arguments are straightforward and should be detailed within the docstring. - Function decorators: + Function decorators All tools, when called, may be used as decorators which configure individual CherryPy page handlers (methods on the CherryPy tree). That is, "@tools.anytool()" should "turn on" the tool via the decorated function's _cp_config attribute. - CherryPy config: + CherryPy config If a tool exposes a "_setup" callable, it will be called once per Request (if the feature is "turned on" via config). @@ -22,6 +22,9 @@ Tools may be implemented as any object with a namespace. The builtins are generally either modules or instances of the tools.Tool class. """ +import sys +import warnings + import cherrypy @@ -29,12 +32,20 @@ def _getargs(func): """Return the names of all static arguments to the given function.""" # Use this instead of importing inspect for less mem overhead. import types - if isinstance(func, types.MethodType): - func = func.im_func - co = func.func_code + if sys.version_info >= (3, 0): + if isinstance(func, types.MethodType): + func = func.__func__ + co = func.__code__ + else: + if isinstance(func, types.MethodType): + func = func.im_func + co = func.func_code return co.co_varnames[:co.co_argcount] +_attr_error = ("CherryPy Tools cannot be turned on directly. Instead, turn them " + "on via config, or use them as decorators on your page handlers.") + class Tool(object): """A registered function for use with CherryPy request-processing hooks. @@ -51,6 +62,12 @@ class Tool(object): self.__doc__ = self.callable.__doc__ self._setargs() + def _get_on(self): + raise AttributeError(_attr_error) + def _set_on(self, value): + raise AttributeError(_attr_error) + on = property(_get_on, _set_on) + def _setargs(self): """Copy func parameter names to obj attributes.""" try: @@ -77,7 +94,7 @@ class Tool(object): else: conf = {} - tm = cherrypy.request.toolmaps[self.namespace] + tm = cherrypy.serving.request.toolmaps[self.namespace] if self._name in tm: conf.update(tm[self._name]) @@ -89,7 +106,7 @@ class Tool(object): def __call__(self, *args, **kwargs): """Compile-time decorator (turn on the tool in config). - For example: + For example:: @tools.proxy() def whats_my_base(self): @@ -105,7 +122,7 @@ class Tool(object): f._cp_config = {} subspace = self.namespace + "." + self._name + "." f._cp_config[subspace + "on"] = True - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): f._cp_config[subspace + k] = v return f return tool_decorator @@ -120,8 +137,8 @@ class Tool(object): p = conf.pop("priority", None) if p is None: p = getattr(self.callable, "priority", self._priority) - cherrypy.request.hooks.attach(self._point, self.callable, - priority=p, **conf) + cherrypy.serving.request.hooks.attach(self._point, self.callable, + priority=p, **conf) class HandlerTool(Tool): @@ -141,7 +158,8 @@ class HandlerTool(Tool): def handler(self, *args, **kwargs): """Use this tool as a CherryPy page handler. - For example: + For example:: + class Root: nav = tools.staticdir.handler(section="/nav", dir="nav", root=absDir) @@ -150,13 +168,13 @@ class HandlerTool(Tool): handled = self.callable(*args, **self._merged_args(kwargs)) if not handled: raise cherrypy.NotFound() - return cherrypy.response.body + return cherrypy.serving.response.body handle_func.exposed = True return handle_func def _wrapper(self, **kwargs): if self.callable(**kwargs): - cherrypy.request.handler = None + cherrypy.serving.request.handler = None def _setup(self): """Hook this tool into cherrypy.request. @@ -168,19 +186,20 @@ class HandlerTool(Tool): p = conf.pop("priority", None) if p is None: p = getattr(self.callable, "priority", self._priority) - cherrypy.request.hooks.attach(self._point, self._wrapper, - priority=p, **conf) + cherrypy.serving.request.hooks.attach(self._point, self._wrapper, + priority=p, **conf) class HandlerWrapperTool(Tool): """Tool which wraps request.handler in a provided wrapper function. The 'newhandler' arg must be a handler wrapper function that takes a - 'next_handler' argument, plus *args and **kwargs. Like all page handler + 'next_handler' argument, plus ``*args`` and ``**kwargs``. Like all + page handler functions, it must return an iterable for use as cherrypy.response.body. For example, to allow your 'inner' page handlers to return dicts - which then get interpolated into a template: + which then get interpolated into a template:: def interpolator(next_handler, *args, **kwargs): filename = cherrypy.request.config.get('template') @@ -196,11 +215,11 @@ class HandlerWrapperTool(Tool): self._name = name self._priority = priority - def callable(self): - innerfunc = cherrypy.request.handler + def callable(self, debug=False): + innerfunc = cherrypy.serving.request.handler def wrap(*args, **kwargs): return self.newhandler(innerfunc, *args, **kwargs) - cherrypy.request.handler = wrap + cherrypy.serving.request.handler = wrap class ErrorTool(Tool): @@ -218,29 +237,32 @@ class ErrorTool(Tool): The standard CherryPy request object will automatically call this method when the tool is "turned on" in config. """ - cherrypy.request.error_response = self._wrapper + cherrypy.serving.request.error_response = self._wrapper # Builtin tools # -from cherrypy.lib import cptools, encoding, auth, static, tidy -from cherrypy.lib import sessions as _sessions, xmlrpc as _xmlrpc -from cherrypy.lib import caching as _caching, wsgiapp as _wsgiapp +from cherrypy.lib import cptools, encoding, auth, static, jsontools +from cherrypy.lib import sessions as _sessions, xmlrpcutil as _xmlrpc +from cherrypy.lib import caching as _caching +from cherrypy.lib import auth_basic, auth_digest class SessionTool(Tool): """Session Tool for CherryPy. - sessions.locking: + sessions.locking When 'implicit' (the default), the session will be locked for you, - just before running the page handler. + just before running the page handler. + When 'early', the session will be locked before reading the request - body. This is off by default for safety reasons; for example, - a large upload would block the session, denying an AJAX - progress meter (see http://www.cherrypy.org/ticket/630). + body. This is off by default for safety reasons; for example, + a large upload would block the session, denying an AJAX + progress meter (see http://www.cherrypy.org/ticket/630). + When 'explicit' (or any other value), you need to call - cherrypy.session.acquire_lock() yourself before using - session data. + cherrypy.session.acquire_lock() yourself before using + session data. """ def __init__(self): @@ -256,7 +278,7 @@ class SessionTool(Tool): The standard CherryPy request object will automatically call this method when the tool is "turned on" in config. """ - hooks = cherrypy.request.hooks + hooks = cherrypy.serving.request.hooks conf = self._merged_args() @@ -286,7 +308,7 @@ class SessionTool(Tool): sess.regenerate() # Grab cookie-relevant tool args - conf = dict([(k, v) for k, v in self._merged_args().iteritems() + conf = dict([(k, v) for k, v in self._merged_args().items() if k in ('path', 'path_header', 'name', 'timeout', 'domain', 'secure')]) _sessions.set_response_cookie(**conf) @@ -300,8 +322,8 @@ class XMLRPCController(object): To use it, have your controllers subclass this base class (it will turn on the tool for you). - You can also supply the following optional config entries: - + You can also supply the following optional config entries:: + tools.xmlrpc.encoding: 'utf-8' tools.xmlrpc.allow_none: 0 @@ -321,8 +343,8 @@ class XMLRPCController(object): The XMLRPCDispatcher strips any /RPC2 prefix; if you aren't using /RPC2 in your URL's, you can safely skip turning on the XMLRPCDispatcher. - Otherwise, you need to use declare it in config: - + Otherwise, you need to use declare it in config:: + request.dispatch: cherrypy.dispatch.XMLRPCDispatcher() """ @@ -345,41 +367,17 @@ class XMLRPCController(object): # http://www.cherrypy.org/ticket/533 # if a method is not found, an xmlrpclib.Fault should be returned # raising an exception here will do that; see - # cherrypy.lib.xmlrpc.on_error - raise Exception, 'method "%s" is not supported' % attr + # cherrypy.lib.xmlrpcutil.on_error + raise Exception('method "%s" is not supported' % attr) - conf = cherrypy.request.toolmaps['tools'].get("xmlrpc", {}) + conf = cherrypy.serving.request.toolmaps['tools'].get("xmlrpc", {}) _xmlrpc.respond(body, conf.get('encoding', 'utf-8'), conf.get('allow_none', 0)) - return cherrypy.response.body + return cherrypy.serving.response.body default.exposed = True -class WSGIAppTool(HandlerTool): - """A tool for running any WSGI middleware/application within CP. - - Here are the parameters: - - wsgi_app - any wsgi application callable - env_update - a dictionary with arbitrary keys and values to be - merged with the WSGI environ dictionary. - - Example: - - class Whatever: - _cp_config = {'tools.wsgiapp.on': True, - 'tools.wsgiapp.app': some_app, - 'tools.wsgiapp.env': app_environ, - } - """ - - def _setup(self): - # Keep request body intact so the wsgi app can have its way with it. - cherrypy.request.process_request_body = False - HandlerTool._setup(self) - - class SessionAuthTool(HandlerTool): def _setargs(self): @@ -391,18 +389,9 @@ class SessionAuthTool(HandlerTool): class CachingTool(Tool): """Caching Tool for CherryPy.""" - def _wrapper(self, invalid_methods=("POST", "PUT", "DELETE"), **kwargs): - request = cherrypy.request - - if not hasattr(cherrypy, "_cache"): - # Make a process-wide Cache object. - cherrypy._cache = kwargs.pop("cache_class", _caching.MemoryCache)() - - # Take all remaining kwargs and set them on the Cache object. - for k, v in kwargs.iteritems(): - setattr(cherrypy._cache, k, v) - - if _caching.get(invalid_methods=invalid_methods): + def _wrapper(self, **kwargs): + request = cherrypy.serving.request + if _caching.get(**kwargs): request.handler = None else: if request.cacheable: @@ -416,8 +405,8 @@ class CachingTool(Tool): conf = self._merged_args() p = conf.pop("priority", None) - cherrypy.request.hooks.attach('before_handler', self._wrapper, - priority=p, **conf) + cherrypy.serving.request.hooks.attach('before_handler', self._wrapper, + priority=p, **conf) @@ -441,7 +430,7 @@ class Toolbox(object): def __enter__(self): """Populate request.toolmaps from tools specified in config.""" - cherrypy.request.toolmaps[self.namespace] = map = {} + cherrypy.serving.request.toolmaps[self.namespace] = map = {} def populate(k, v): toolname, arg = k.split(".", 1) bucket = map.setdefault(toolname, {}) @@ -450,7 +439,7 @@ class Toolbox(object): def __exit__(self, exc_type, exc_val, exc_tb): """Run tool._setup() for each tool in our toolmap.""" - map = cherrypy.request.toolmaps.get(self.namespace) + map = cherrypy.serving.request.toolmaps.get(self.namespace) if map: for name, settings in map.items(): if settings.get("on", False): @@ -458,8 +447,29 @@ class Toolbox(object): tool._setup() +class DeprecatedTool(Tool): + + _name = None + warnmsg = "This Tool is deprecated." + + def __init__(self, point, warnmsg=None): + self.point = point + if warnmsg is not None: + self.warnmsg = warnmsg + + def __call__(self, *args, **kwargs): + warnings.warn(self.warnmsg) + def tool_decorator(f): + return f + return tool_decorator + + def _setup(self): + warnings.warn(self.warnmsg) + + default_toolbox = _d = Toolbox("tools") _d.session_auth = SessionAuthTool(cptools.session_auth) +_d.allow = Tool('on_start_resource', cptools.allow) _d.proxy = Tool('before_request_body', cptools.proxy, priority=30) _d.response_headers = Tool('on_start_resource', cptools.response_headers) _d.log_tracebacks = Tool('before_error_response', cptools.log_traceback) @@ -467,19 +477,22 @@ _d.log_headers = Tool('before_error_response', cptools.log_request_headers) _d.log_hooks = Tool('on_end_request', cptools.log_hooks, priority=100) _d.err_redirect = ErrorTool(cptools.redirect) _d.etags = Tool('before_finalize', cptools.validate_etags, priority=75) -_d.decode = Tool('before_handler', encoding.decode) +_d.decode = Tool('before_request_body', encoding.decode) # the order of encoding, gzip, caching is important -_d.encode = Tool('before_finalize', encoding.encode, priority=70) +_d.encode = Tool('before_handler', encoding.ResponseEncoder, priority=70) _d.gzip = Tool('before_finalize', encoding.gzip, priority=80) _d.staticdir = HandlerTool(static.staticdir) _d.staticfile = HandlerTool(static.staticfile) _d.sessions = SessionTool() _d.xmlrpc = ErrorTool(_xmlrpc.on_error) -_d.wsgiapp = WSGIAppTool(_wsgiapp.run) _d.caching = CachingTool('before_handler', _caching.get, 'caching') _d.expires = Tool('before_finalize', _caching.expires) -_d.tidy = Tool('before_finalize', tidy.tidy) -_d.nsgmls = Tool('before_finalize', tidy.nsgmls) +_d.tidy = DeprecatedTool('before_finalize', + "The tidy tool has been removed from the standard distribution of CherryPy. " + "The most recent version can be found at http://tools.cherrypy.org/browser.") +_d.nsgmls = DeprecatedTool('before_finalize', + "The nsgmls tool has been removed from the standard distribution of CherryPy. " + "The most recent version can be found at http://tools.cherrypy.org/browser.") _d.ignore_headers = Tool('before_request_body', cptools.ignore_headers) _d.referer = Tool('before_request_body', cptools.referer) _d.basic_auth = Tool('on_start_resource', auth.basic_auth) @@ -488,5 +501,10 @@ _d.trailing_slash = Tool('before_handler', cptools.trailing_slash, priority=60) _d.flatten = Tool('before_finalize', cptools.flatten) _d.accept = Tool('on_start_resource', cptools.accept) _d.redirect = Tool('on_start_resource', cptools.redirect) +_d.autovary = Tool('on_start_resource', cptools.autovary, priority=0) +_d.json_in = Tool('before_request_body', jsontools.json_in, priority=30) +_d.json_out = Tool('before_handler', jsontools.json_out, priority=30) +_d.auth_basic = Tool('before_handler', auth_basic.basic_auth, priority=1) +_d.auth_digest = Tool('before_handler', auth_digest.digest_auth, priority=1) -del _d, cptools, encoding, auth, static, tidy +del _d, cptools, encoding, auth, static diff --git a/src/cherrypy/_cptree.py b/src/cherrypy/_cptree.py index 36d00865a2..3aa4b9e10a 100644 --- a/src/cherrypy/_cptree.py +++ b/src/cherrypy/_cptree.py @@ -1,9 +1,12 @@ """CherryPy Application and Tree objects.""" import os +import sys + import cherrypy +from cherrypy._cpcompat import ntou, py3k from cherrypy import _cpconfig, _cplogging, _cprequest, _cpwsgi, tools -from cherrypy.lib import http as _http +from cherrypy.lib import httputil class Application(object): @@ -16,29 +19,25 @@ class Application(object): (WSGI application object) for itself. """ - __metaclass__ = cherrypy._AttributeDocstrings - root = None - root__doc = """ - The top-most container of page handlers for this app. Handlers should + """The top-most container of page handlers for this app. Handlers should be arranged in a hierarchy of attributes, matching the expected URI hierarchy; the default dispatcher then searches this hierarchy for a matching handler. When using a dispatcher other than the default, this value may be None.""" config = {} - config__doc = """ - A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict + """A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict of {key: value} pairs.""" namespaces = _cpconfig.NamespaceSet() toolboxes = {'tools': cherrypy.tools} log = None - log__doc = """A LogManager instance. See _cplogging.""" + """A LogManager instance. See _cplogging.""" wsgiapp = None - wsgiapp__doc = """A CPWSGIApp instance. See _cpwsgi.""" + """A CPWSGIApp instance. See _cpwsgi.""" request_class = _cprequest.Request response_class = _cprequest.Response @@ -63,8 +62,7 @@ class Application(object): return "%s.%s(%r, %r)" % (self.__module__, self.__class__.__name__, self.root, self.script_name) - script_name__doc = """ - The URI "mount point" for this app. A mount point is that portion of + script_name_doc = """The URI "mount point" for this app. A mount point is that portion of the URI which is constant for all URIs that are serviced by this application; it does not include scheme, host, or proxy ("virtual host") portions of the URI. @@ -82,14 +80,14 @@ class Application(object): def _get_script_name(self): if self._script_name is None: # None signals that the script name should be pulled from WSGI environ. - return cherrypy.request.wsgi_environ['SCRIPT_NAME'].rstrip("/") + return cherrypy.serving.request.wsgi_environ['SCRIPT_NAME'].rstrip("/") return self._script_name def _set_script_name(self, value): if value: value = value.rstrip("/") self._script_name = value script_name = property(fget=_get_script_name, fset=_set_script_name, - doc=script_name__doc) + doc=script_name_doc) def merge(self, config): """Merge the given config into self.config.""" @@ -98,18 +96,37 @@ class Application(object): # Handle namespaces specified in config. self.namespaces(self.config.get("/", {})) + def find_config(self, path, key, default=None): + """Return the most-specific value for key along path, or default.""" + trail = path or "/" + while trail: + nodeconf = self.config.get(trail, {}) + + if key in nodeconf: + return nodeconf[key] + + lastslash = trail.rfind("/") + if lastslash == -1: + break + elif lastslash == 0 and trail != "/": + trail = "/" + else: + trail = trail[:lastslash] + + return default + def get_serving(self, local, remote, scheme, sproto): """Create and return a Request and Response object.""" req = self.request_class(local, remote, scheme, sproto) req.app = self - for name, toolbox in self.toolboxes.iteritems(): + for name, toolbox in self.toolboxes.items(): req.namespaces[name] = toolbox resp = self.response_class() cherrypy.serving.load(req, resp) - cherrypy.engine.timeout_monitor.acquire() cherrypy.engine.publish('acquire_thread') + cherrypy.engine.publish('before_request') return req, resp @@ -117,7 +134,7 @@ class Application(object): """Release the current serving (request and response).""" req = cherrypy.serving.request - cherrypy.engine.timeout_monitor.release() + cherrypy.engine.publish('after_request') try: req.close() @@ -139,7 +156,7 @@ class Tree(object): """ apps = {} - apps__doc = """ + """ A dict of the form {script name: application}, where "script name" is a string declaring the URI mount point (no trailing slash), and "application" is an instance of cherrypy.Application (or an arbitrary @@ -151,11 +168,14 @@ class Tree(object): def mount(self, root, script_name="", config=None): """Mount a new app from a root object, script_name, and config. - root: an instance of a "controller class" (a collection of page + root + An instance of a "controller class" (a collection of page handler methods) which represents the root of the application. This may also be an Application instance, or None if using a dispatcher other than the default. - script_name: a string containing the "mount point" of the application. + + script_name + A string containing the "mount point" of the application. This should start with a slash, and be the path portion of the URL at which to mount the given root. For example, if root.index() will handle requests to "http://www.example.com:8080/dept/app1/", @@ -163,15 +183,26 @@ class Tree(object): It MUST NOT end in a slash. If the script_name refers to the root of the URI, it MUST be an empty string (not "/"). - config: a file or dict containing application config. + + config + A file or dict containing application config. """ + if script_name is None: + raise TypeError( + "The 'script_name' argument may not be None. Application " + "objects may, however, possess a script_name of None (in " + "order to inpect the WSGI environ for SCRIPT_NAME upon each " + "request). You cannot mount such Applications on this Tree; " + "you must pass them to a WSGI server interface directly.") + # Next line both 1) strips trailing slash and 2) maps "/" -> "". script_name = script_name.rstrip("/") if isinstance(root, Application): app = root if script_name != "" and script_name != app.script_name: - raise ValueError, "Cannot specify a different script name and pass an Application instance to cherrypy.mount" + raise ValueError("Cannot specify a different script name and " + "pass an Application instance to cherrypy.mount") script_name = app.script_name else: app = Application(root, script_name) @@ -201,11 +232,11 @@ class Tree(object): If path is None, cherrypy.request is used. """ - if path is None: try: - path = _http.urljoin(cherrypy.request.script_name, - cherrypy.request.path_info) + request = cherrypy.serving.request + path = httputil.urljoin(request.script_name, + request.path_info) except AttributeError: return None @@ -223,8 +254,11 @@ class Tree(object): # If you're calling this, then you're probably setting SCRIPT_NAME # to '' (some WSGI servers always set SCRIPT_NAME to ''). # Try to look up the app using the full path. - path = _http.urljoin(environ.get('SCRIPT_NAME', ''), - environ.get('PATH_INFO', '')) + env1x = environ + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + env1x = _cpwsgi.downgrade_wsgi_ux_to_1x(environ) + path = httputil.urljoin(env1x.get('SCRIPT_NAME', ''), + env1x.get('PATH_INFO', '')) sn = self.script_name(path or "/") if sn is None: start_response('404 Not Found', []) @@ -234,7 +268,23 @@ class Tree(object): # Correct the SCRIPT_NAME and PATH_INFO environ entries. environ = environ.copy() - environ['SCRIPT_NAME'] = sn - environ['PATH_INFO'] = path[len(sn.rstrip("/")):] + if not py3k: + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + # Python 2/WSGI u.0: all strings MUST be of type unicode + enc = environ[ntou('wsgi.url_encoding')] + environ[ntou('SCRIPT_NAME')] = sn.decode(enc) + environ[ntou('PATH_INFO')] = path[len(sn.rstrip("/")):].decode(enc) + else: + # Python 2/WSGI 1.x: all strings MUST be of type str + environ['SCRIPT_NAME'] = sn + environ['PATH_INFO'] = path[len(sn.rstrip("/")):] + else: + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + # Python 3/WSGI u.0: all strings MUST be full unicode + environ['SCRIPT_NAME'] = sn + environ['PATH_INFO'] = path[len(sn.rstrip("/")):] + else: + # Python 3/WSGI 1.x: all strings MUST be ISO-8859-1 str + environ['SCRIPT_NAME'] = sn.encode('utf-8').decode('ISO-8859-1') + environ['PATH_INFO'] = path[len(sn.rstrip("/")):].encode('utf-8').decode('ISO-8859-1') return app(environ, start_response) - diff --git a/src/cherrypy/_cpwsgi.py b/src/cherrypy/_cpwsgi.py index b7442471f0..80dd274c49 100644 --- a/src/cherrypy/_cpwsgi.py +++ b/src/cherrypy/_cpwsgi.py @@ -1,18 +1,40 @@ -"""WSGI interface (see PEP 333).""" +"""WSGI interface (see PEP 333 and 3333). + +Note that WSGI environ keys and values are 'native strings'; that is, +whatever the type of "" is. For Python 2, that's a byte string; for Python 3, +it's a unicode string. But PEP 3333 says: "even if Python's str type is +actually Unicode "under the hood", the content of native strings must +still be translatable to bytes via the Latin-1 encoding!" +""" -import StringIO as _StringIO import sys as _sys import cherrypy as _cherrypy +from cherrypy._cpcompat import BytesIO, bytestr, ntob, ntou, py3k, unicodestr from cherrypy import _cperror -from cherrypy.lib import http as _http +from cherrypy.lib import httputil + + +def downgrade_wsgi_ux_to_1x(environ): + """Return a new environ dict for WSGI 1.x from the given WSGI u.x environ.""" + env1x = {} + + url_encoding = environ[ntou('wsgi.url_encoding')] + for k, v in list(environ.items()): + if k in [ntou('PATH_INFO'), ntou('SCRIPT_NAME'), ntou('QUERY_STRING')]: + v = v.encode(url_encoding) + elif isinstance(v, unicodestr): + v = v.encode('ISO-8859-1') + env1x[k.encode('ISO-8859-1')] = v + + return env1x class VirtualHost(object): """Select a different WSGI application based on the Host header. This can be useful when running multiple sites within one CP server. - It allows several domains to point to different applications. For example: + It allows several domains to point to different applications. For example:: root = Root() RootApp = cherrypy.Application(root) @@ -25,19 +47,22 @@ class VirtualHost(object): }) cherrypy.tree.graft(vhost) + """ + default = None + """Required. The default WSGI application.""" - default: required. The default WSGI application. + use_x_forwarded_host = True + """If True (the default), any "X-Forwarded-Host" + request header will be used instead of the "Host" header. This + is commonly added by HTTP servers (such as Apache) when proxying.""" - use_x_forwarded_host: if True (the default), any "X-Forwarded-Host" - request header will be used instead of the "Host" header. This - is commonly added by HTTP servers (such as Apache) when proxying. - - domains: a dict of {host header value: application} pairs. - The incoming "Host" request header is looked up in this dict, - and, if a match is found, the corresponding WSGI application - will be called instead of the default. Note that you often need - separate entries for "example.com" and "www.example.com". - In addition, "Host" headers may contain the port number. + domains = {} + """A dict of {host header value: application} pairs. + The incoming "Host" request header is looked up in this dict, + and, if a match is found, the corresponding WSGI application + will be called instead of the default. Note that you often need + separate entries for "example.com" and "www.example.com". + In addition, "Host" headers may contain the port number. """ def __init__(self, default, domains=None, use_x_forwarded_host=True): @@ -56,178 +81,198 @@ class VirtualHost(object): return nextapp(environ, start_response) +class InternalRedirector(object): + """WSGI middleware that handles raised cherrypy.InternalRedirect.""" + + def __init__(self, nextapp, recursive=False): + self.nextapp = nextapp + self.recursive = recursive + + def __call__(self, environ, start_response): + redirections = [] + while True: + environ = environ.copy() + try: + return self.nextapp(environ, start_response) + except _cherrypy.InternalRedirect: + ir = _sys.exc_info()[1] + sn = environ.get('SCRIPT_NAME', '') + path = environ.get('PATH_INFO', '') + qs = environ.get('QUERY_STRING', '') + + # Add the *previous* path_info + qs to redirections. + old_uri = sn + path + if qs: + old_uri += "?" + qs + redirections.append(old_uri) + + if not self.recursive: + # Check to see if the new URI has been redirected to already + new_uri = sn + ir.path + if ir.query_string: + new_uri += "?" + ir.query_string + if new_uri in redirections: + ir.request.close() + raise RuntimeError("InternalRedirector visited the " + "same URL twice: %r" % new_uri) + + # Munge the environment and try again. + environ['REQUEST_METHOD'] = "GET" + environ['PATH_INFO'] = ir.path + environ['QUERY_STRING'] = ir.query_string + environ['wsgi.input'] = BytesIO() + environ['CONTENT_LENGTH'] = "0" + environ['cherrypy.previous_request'] = ir.request + + +class ExceptionTrapper(object): + """WSGI middleware that traps exceptions.""" + + def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)): + self.nextapp = nextapp + self.throws = throws + + def __call__(self, environ, start_response): + return _TrappedResponse(self.nextapp, environ, start_response, self.throws) + + +class _TrappedResponse(object): + + response = iter([]) + + def __init__(self, nextapp, environ, start_response, throws): + self.nextapp = nextapp + self.environ = environ + self.start_response = start_response + self.throws = throws + self.started_response = False + self.response = self.trap(self.nextapp, self.environ, self.start_response) + self.iter_response = iter(self.response) + + def __iter__(self): + self.started_response = True + return self + + if py3k: + def __next__(self): + return self.trap(next, self.iter_response) + else: + def next(self): + return self.trap(self.iter_response.next) + + def close(self): + if hasattr(self.response, 'close'): + self.response.close() + + def trap(self, func, *args, **kwargs): + try: + return func(*args, **kwargs) + except self.throws: + raise + except StopIteration: + raise + except: + tb = _cperror.format_exc() + #print('trapped (started %s):' % self.started_response, tb) + _cherrypy.log(tb, severity=40) + if not _cherrypy.request.show_tracebacks: + tb = "" + s, h, b = _cperror.bare_error(tb) + if py3k: + # What fun. + s = s.decode('ISO-8859-1') + h = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1')) + for k, v in h] + if self.started_response: + # Empty our iterable (so future calls raise StopIteration) + self.iter_response = iter([]) + else: + self.iter_response = iter(b) + + try: + self.start_response(s, h, _sys.exc_info()) + except: + # "The application must not trap any exceptions raised by + # start_response, if it called start_response with exc_info. + # Instead, it should allow such exceptions to propagate + # back to the server or gateway." + # But we still log and call close() to clean up ourselves. + _cherrypy.log(traceback=True, severity=40) + raise + + if self.started_response: + return ntob("").join(b) + else: + return b + # WSGI-to-CP Adapter # class AppResponse(object): + """WSGI response iterable for CherryPy applications.""" - throws = (KeyboardInterrupt, SystemExit) - request = None - - def __init__(self, environ, start_response, cpapp, recursive=False): - self.redirections = [] - self.recursive = recursive - self.environ = environ - self.start_response = start_response + def __init__(self, environ, start_response, cpapp): self.cpapp = cpapp - self.setapp() - - def setapp(self): try: - self.request = self.get_request() - s, h, b = self.get_response() - self.iter_response = iter(b) - self.start_response(s, h) - except self.throws: + if not py3k: + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + environ = downgrade_wsgi_ux_to_1x(environ) + self.environ = environ + self.run() + + r = _cherrypy.serving.response + + outstatus = r.output_status + if not isinstance(outstatus, bytestr): + raise TypeError("response.output_status is not a byte string.") + + outheaders = [] + for k, v in r.header_list: + if not isinstance(k, bytestr): + raise TypeError("response.header_list key %r is not a byte string." % k) + if not isinstance(v, bytestr): + raise TypeError("response.header_list value %r is not a byte string." % v) + outheaders.append((k, v)) + + if py3k: + # According to PEP 3333, when using Python 3, the response status + # and headers must be bytes masquerading as unicode; that is, they + # must be of type "str" but are restricted to code points in the + # "latin-1" set. + outstatus = outstatus.decode('ISO-8859-1') + outheaders = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1')) + for k, v in outheaders] + + self.iter_response = iter(r.body) + self.write = start_response(outstatus, outheaders) + except: self.close() raise - except _cherrypy.InternalRedirect, ir: - self.environ['cherrypy.previous_request'] = _cherrypy.serving.request - self.close() - self.iredirect(ir.path, ir.query_string) - return - except: - if getattr(self.request, "throw_errors", False): - self.close() - raise - - tb = _cperror.format_exc() - _cherrypy.log(tb, severity=40) - if not getattr(self.request, "show_tracebacks", True): - tb = "" - s, h, b = _cperror.bare_error(tb) - self.iter_response = iter(b) - - try: - self.start_response(s, h, _sys.exc_info()) - except: - # "The application must not trap any exceptions raised by - # start_response, if it called start_response with exc_info. - # Instead, it should allow such exceptions to propagate - # back to the server or gateway." - # But we still log and call close() to clean up ourselves. - _cherrypy.log(traceback=True, severity=40) - self.close() - raise - - def iredirect(self, path, query_string): - """Doctor self.environ and perform an internal redirect. - - When cherrypy.InternalRedirect is raised, this method is called. - It rewrites the WSGI environ using the new path and query_string, - and calls a new CherryPy Request object. Because the wsgi.input - stream may have already been consumed by the next application, - the redirected call will always be of HTTP method "GET"; therefore, - any params must be passed in the query_string argument, which is - formed from InternalRedirect.query_string when using that exception. - If you need something more complicated, make and raise your own - exception and write your own AppResponse subclass to trap it. ;) - - It would be a bad idea to redirect after you've already yielded - response content, although an enterprising soul could choose - to abuse this. - """ - env = self.environ - if not self.recursive: - sn = env.get('SCRIPT_NAME', '') - qs = query_string - if qs: - qs = "?" + qs - if sn + path + qs in self.redirections: - raise RuntimeError("InternalRedirector visited the " - "same URL twice: %r + %r + %r" % - (sn, path, qs)) - else: - # Add the *previous* path_info + qs to redirections. - p = env.get('PATH_INFO', '') - qs = env.get('QUERY_STRING', '') - if qs: - qs = "?" + qs - self.redirections.append(sn + p + qs) - - # Munge environment and try again. - env['REQUEST_METHOD'] = "GET" - env['PATH_INFO'] = path - env['QUERY_STRING'] = query_string - env['wsgi.input'] = _StringIO.StringIO() - env['CONTENT_LENGTH'] = "0" - - self.setapp() def __iter__(self): return self - def next(self): - try: - chunk = self.iter_response.next() - # WSGI requires all data to be of type "str". This coercion should - # not take any time at all if chunk is already of type "str". - # If it's unicode, it could be a big performance hit (x ~500). - if not isinstance(chunk, str): - chunk = chunk.encode("ISO-8859-1") - return chunk - except self.throws: - self.close() - raise - except _cherrypy.InternalRedirect, ir: - self.environ['cherrypy.previous_request'] = _cherrypy.serving.request - self.close() - self.iredirect(ir.path, ir.query_string) - except StopIteration: - raise - except: - if getattr(self.request, "throw_errors", False): - self.close() - raise - - tb = _cperror.format_exc() - _cherrypy.log(tb, severity=40) - if not getattr(self.request, "show_tracebacks", True): - tb = "" - s, h, b = _cperror.bare_error(tb) - # Empty our iterable (so future calls raise StopIteration) - self.iter_response = iter([]) - - try: - self.start_response(s, h, _sys.exc_info()) - except: - # "The application must not trap any exceptions raised by - # start_response, if it called start_response with exc_info. - # Instead, it should allow such exceptions to propagate - # back to the server or gateway." - # But we still log and call close() to clean up ourselves. - _cherrypy.log(traceback=True, severity=40) - self.close() - raise - - return "".join(b) + if py3k: + def __next__(self): + return next(self.iter_response) + else: + def next(self): + return self.iter_response.next() def close(self): """Close and de-reference the current request and response. (Core)""" self.cpapp.release_serving() - def get_response(self): - """Run self.request and return its response.""" - meth = self.environ['REQUEST_METHOD'] - path = _http.urljoin(self.environ.get('SCRIPT_NAME', ''), - self.environ.get('PATH_INFO', '')) - qs = self.environ.get('QUERY_STRING', '') - rproto = self.environ.get('SERVER_PROTOCOL') - headers = self.translate_headers(self.environ) - rfile = self.environ['wsgi.input'] - response = self.request.run(meth, path, qs, rproto, headers, rfile) - return response.status, response.header_list, response.body - - def get_request(self): + def run(self): """Create a Request object using environ.""" env = self.environ.get - local = _http.Host('', int(env('SERVER_PORT', 80)), + local = httputil.Host('', int(env('SERVER_PORT', 80)), env('SERVER_NAME', '')) - remote = _http.Host(env('REMOTE_ADDR', ''), - int(env('REMOTE_PORT', -1)), - env('REMOTE_HOST', '')) + remote = httputil.Host(env('REMOTE_ADDR', ''), + int(env('REMOTE_PORT', -1) or -1), + env('REMOTE_HOST', '')) scheme = env('wsgi.url_scheme') sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1") request, resp = self.cpapp.get_serving(local, remote, scheme, sproto) @@ -240,7 +285,39 @@ class AppResponse(object): request.multiprocess = self.environ['wsgi.multiprocess'] request.wsgi_environ = self.environ request.prev = env('cherrypy.previous_request', None) - return request + + meth = self.environ['REQUEST_METHOD'] + + path = httputil.urljoin(self.environ.get('SCRIPT_NAME', ''), + self.environ.get('PATH_INFO', '')) + qs = self.environ.get('QUERY_STRING', '') + + if py3k: + # This isn't perfect; if the given PATH_INFO is in the wrong encoding, + # it may fail to match the appropriate config section URI. But meh. + old_enc = self.environ.get('wsgi.url_encoding', 'ISO-8859-1') + new_enc = self.cpapp.find_config(self.environ.get('PATH_INFO', ''), + "request.uri_encoding", 'utf-8') + if new_enc.lower() != old_enc.lower(): + # Even though the path and qs are unicode, the WSGI server is + # required by PEP 3333 to coerce them to ISO-8859-1 masquerading + # as unicode. So we have to encode back to bytes and then decode + # again using the "correct" encoding. + try: + u_path = path.encode(old_enc).decode(new_enc) + u_qs = qs.encode(old_enc).decode(new_enc) + except (UnicodeEncodeError, UnicodeDecodeError): + # Just pass them through without transcoding and hope. + pass + else: + # Only set transcoded values if they both succeed. + path = u_path + qs = u_qs + + rproto = self.environ.get('SERVER_PROTOCOL') + headers = self.translate_headers(self.environ) + rfile = self.environ['wsgi.input'] + request.run(meth, path, qs, rproto, headers, rfile) headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization', 'CONTENT_LENGTH': 'Content-Length', @@ -262,28 +339,29 @@ class AppResponse(object): class CPWSGIApp(object): - """A WSGI application object for a CherryPy Application. + """A WSGI application object for a CherryPy Application.""" - pipeline: a list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a - constructor that takes an initial, positional 'nextapp' argument, - plus optional keyword arguments, and returns a WSGI application - (that takes environ and start_response arguments). The 'name' can - be any you choose, and will correspond to keys in self.config. + pipeline = [('ExceptionTrapper', ExceptionTrapper), + ('InternalRedirector', InternalRedirector), + ] + """A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a + constructor that takes an initial, positional 'nextapp' argument, + plus optional keyword arguments, and returns a WSGI application + (that takes environ and start_response arguments). The 'name' can + be any you choose, and will correspond to keys in self.config.""" - head: rather than nest all apps in the pipeline on each call, it's only - done the first time, and the result is memoized into self.head. Set - this to None again if you change self.pipeline after calling self. - - config: a dict whose keys match names listed in the pipeline. Each - value is a further dict which will be passed to the corresponding - named WSGI callable (from the pipeline) as keyword arguments. - """ - - pipeline = [] head = None + """Rather than nest all apps in the pipeline on each call, it's only + done the first time, and the result is memoized into self.head. Set + this to None again if you change self.pipeline after calling self.""" + config = {} + """A dict whose keys match names listed in the pipeline. Each + value is a further dict which will be passed to the corresponding + named WSGI callable (from the pipeline) as keyword arguments.""" response_class = AppResponse + """The class to instantiate and return as the next app in the WSGI chain.""" def __init__(self, cpapp, pipeline=None): self.cpapp = cpapp diff --git a/src/cherrypy/_cpwsgi_server.py b/src/cherrypy/_cpwsgi_server.py index ac8bfaa2ec..21af51347c 100644 --- a/src/cherrypy/_cpwsgi_server.py +++ b/src/cherrypy/_cpwsgi_server.py @@ -1,25 +1,12 @@ """WSGI server interface (see PEP 333). This adds some CP-specific bits to the framework-agnostic wsgiserver package. """ +import sys import cherrypy from cherrypy import wsgiserver -class CPHTTPRequest(wsgiserver.HTTPRequest): - - def __init__(self, sendall, environ, wsgi_app): - s = cherrypy.server - self.max_request_header_size = s.max_request_header_size or 0 - self.max_request_body_size = s.max_request_body_size or 0 - wsgiserver.HTTPRequest.__init__(self, sendall, environ, wsgi_app) - - -class CPHTTPConnection(wsgiserver.HTTPConnection): - - RequestHandlerClass = CPHTTPRequest - - class CPWSGIServer(wsgiserver.CherryPyWSGIServer): """Wrapper for wsgiserver.CherryPyWSGIServer. @@ -29,27 +16,48 @@ class CPWSGIServer(wsgiserver.CherryPyWSGIServer): and apply some attributes from config -> cherrypy.server -> wsgiserver. """ - ConnectionClass = CPHTTPConnection - - def __init__(self): - server = cherrypy.server - sockFile = server.socket_file - if sockFile: - bind_addr = sockFile - else: - bind_addr = (server.socket_host, server.socket_port) + def __init__(self, server_adapter=cherrypy.server): + self.server_adapter = server_adapter + self.max_request_header_size = self.server_adapter.max_request_header_size or 0 + self.max_request_body_size = self.server_adapter.max_request_body_size or 0 + server_name = (self.server_adapter.socket_host or + self.server_adapter.socket_file or + None) + + self.wsgi_version = self.server_adapter.wsgi_version s = wsgiserver.CherryPyWSGIServer - s.__init__(self, bind_addr, cherrypy.tree, - server.thread_pool, - server.socket_host, - max = server.thread_pool_max, - request_queue_size = server.socket_queue_size, - timeout = server.socket_timeout, - shutdown_timeout = server.shutdown_timeout, + s.__init__(self, server_adapter.bind_addr, cherrypy.tree, + self.server_adapter.thread_pool, + server_name, + max = self.server_adapter.thread_pool_max, + request_queue_size = self.server_adapter.socket_queue_size, + timeout = self.server_adapter.socket_timeout, + shutdown_timeout = self.server_adapter.shutdown_timeout, ) - self.protocol = server.protocol_version - self.nodelay = server.nodelay - self.ssl_certificate = server.ssl_certificate - self.ssl_private_key = server.ssl_private_key + self.protocol = self.server_adapter.protocol_version + self.nodelay = self.server_adapter.nodelay + + if sys.version_info >= (3, 0): + ssl_module = self.server_adapter.ssl_module or 'builtin' + else: + ssl_module = self.server_adapter.ssl_module or 'pyopenssl' + if self.server_adapter.ssl_context: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + self.ssl_adapter.context = self.server_adapter.ssl_context + elif self.server_adapter.ssl_certificate: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + + self.stats['Enabled'] = getattr(self.server_adapter, 'statistics', False) + + def error_log(self, msg="", level=20, traceback=False): + cherrypy.engine.log(msg, level, traceback) diff --git a/src/cherrypy/cherryd b/src/cherrypy/cherryd old mode 100644 new mode 100755 index ef1bd7a3d4..adb2a02e0e --- a/src/cherrypy/cherryd +++ b/src/cherrypy/cherryd @@ -5,17 +5,23 @@ import sys import cherrypy from cherrypy.process import plugins, servers - +from cherrypy import Application def start(configfiles=None, daemonize=False, environment=None, - fastcgi=False, scgi=False, pidfile=None, imports=None): + fastcgi=False, scgi=False, pidfile=None, imports=None, + cgi=False): """Subscribe all engine plugins and start the engine.""" sys.path = [''] + sys.path for i in imports or []: - exec "import %s" % i + exec("import %s" % i) for c in configfiles or []: cherrypy.config.update(c) + # If there's only one app mounted, merge config into it. + if len(cherrypy.tree.apps) == 1: + for app in cherrypy.tree.apps.values(): + if isinstance(app, Application): + app.merge(c) engine = cherrypy.engine @@ -36,28 +42,27 @@ def start(configfiles=None, daemonize=False, environment=None, if hasattr(engine, "console_control_handler"): engine.console_control_handler.subscribe() - if fastcgi and scgi: - # fastcgi and scgi aren't allowed together. - cherrypy.log.error("fastcgi and scgi aren't allowed together.", 'ENGINE') + if (fastcgi and (scgi or cgi)) or (scgi and cgi): + cherrypy.log.error("You may only specify one of the cgi, fastcgi, and " + "scgi options.", 'ENGINE') sys.exit(1) - elif fastcgi or scgi: - # Turn off autoreload when using fastcgi or scgi. + elif fastcgi or scgi or cgi: + # Turn off autoreload when using *cgi. cherrypy.config.update({'engine.autoreload_on': False}) # Turn off the default HTTP server (which is subscribed by default). cherrypy.server.unsubscribe() - sock_file = cherrypy.config.get('server.socket_file', None) - if sock_file: - bindAddress = sock_file - else: - flup_port = cherrypy.config.get('server.socket_port', 4000) - flup_bindaddr = cherrypy.config.get('server.socket_host', '0.0.0.0') - bindAddress = (flup_bindaddr, flup_port) + addr = cherrypy.server.bind_addr if fastcgi: - f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=bindAddress) + f = servers.FlupFCGIServer(application=cherrypy.tree, + bindAddress=addr) + elif scgi: + f = servers.FlupSCGIServer(application=cherrypy.tree, + bindAddress=addr) else: - f = servers.FlupSCGIServer(application=cherrypy.tree, bindAddress=bindAddress) - s = servers.ServerAdapter(engine, httpserver=f, bind_addr=bindAddress) + f = servers.FlupCGIServer(application=cherrypy.tree, + bindAddress=addr) + s = servers.ServerAdapter(engine, httpserver=f, bind_addr=addr) s.subscribe() # Always start the engine; this will start all other services @@ -84,13 +89,21 @@ if __name__ == '__main__': help="start a fastcgi server instead of the default HTTP server") p.add_option('-s', action="store_true", dest='scgi', help="start a scgi server instead of the default HTTP server") + p.add_option('-x', action="store_true", dest='cgi', + help="start a cgi server instead of the default HTTP server") p.add_option('-i', '--import', action="append", dest='imports', help="specify modules to import") p.add_option('-p', '--pidfile', dest='pidfile', default=None, help="store the process id in the given file") + p.add_option('-P', '--Path', action="append", dest='Path', + help="add the given paths to sys.path") options, args = p.parse_args() + if options.Path: + for p in options.Path: + sys.path.insert(0, p) + start(options.config, options.daemonize, - options.environment, options.fastcgi, options.scgi, options.pidfile, - options.imports) + options.environment, options.fastcgi, options.scgi, + options.pidfile, options.imports, options.cgi) diff --git a/src/cherrypy/lib/__init__.py b/src/cherrypy/lib/__init__.py index 47be2eddd0..3fc0ec58df 100644 --- a/src/cherrypy/lib/__init__.py +++ b/src/cherrypy/lib/__init__.py @@ -1,147 +1,27 @@ """CherryPy Library""" -import sys as _sys +# Deprecated in CherryPy 3.2 -- remove in CherryPy 3.3 +from cherrypy.lib.reprconf import unrepr, modules, attributes - -def modules(modulePath): - """Load a module and retrieve a reference to that module.""" - try: - mod = _sys.modules[modulePath] - if mod is None: - raise KeyError() - except KeyError: - # The last [''] is important. - mod = __import__(modulePath, globals(), locals(), ['']) - return mod - -def attributes(full_attribute_name): - """Load a module and retrieve an attribute of that module.""" - - # Parse out the path, module, and attribute - last_dot = full_attribute_name.rfind(u".") - attr_name = full_attribute_name[last_dot + 1:] - mod_path = full_attribute_name[:last_dot] - - mod = modules(mod_path) - # Let an AttributeError propagate outward. - try: - attr = getattr(mod, attr_name) - except AttributeError: - raise AttributeError("'%s' object has no attribute '%s'" - % (mod_path, attr_name)) - - # Return a reference to the attribute. - return attr - - -# public domain "unrepr" implementation, found on the web and then improved. - -class _Builder: - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise TypeError("unrepr does not recognize %s" % - repr(o.__class__.__name__)) - return m(o) - - def build_Subscript(self, o): - expr, flags, subs = o.getChildren() - expr = self.build(expr) - subs = self.build(subs) - return expr[subs] - - def build_CallFunc(self, o): - children = map(self.build, o.getChildren()) - callee = children.pop(0) - kwargs = children.pop() or {} - starargs = children.pop() or () - args = tuple(children) + tuple(starargs) - return callee(*args, **kwargs) - - def build_List(self, o): - return map(self.build, o.getChildren()) - - def build_Const(self, o): - return o.value - - def build_Dict(self, o): - d = {} - i = iter(map(self.build, o.getChildren())) - for el in i: - d[el] = i.next() - return d - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - if o.name == 'None': - return None - if o.name == 'True': - return True - if o.name == 'False': - return False - - # See if the Name is a package or module. If it is, import it. - try: - return modules(o.name) - except ImportError: - pass - - # See if the Name is in __builtin__. - try: - import __builtin__ - return getattr(__builtin__, o.name) - except AttributeError: - pass - - raise TypeError("unrepr could not resolve the name %s" % repr(o.name)) - - def build_Add(self, o): - left, right = map(self.build, o.getChildren()) - return left + right - - def build_Getattr(self, o): - parent = self.build(o.expr) - return getattr(parent, o.attrname) - - def build_NoneType(self, o): - return None - - def build_UnarySub(self, o): - return -self.build(o.getChildren()[0]) - - def build_UnaryAdd(self, o): - return self.build(o.getChildren()[0]) - - -def unrepr(s): - """Return a Python object compiled from a string.""" - if not s: - return s - - try: - import compiler - except ImportError: - # Fallback to eval when compiler package is not available, - # e.g. IronPython 1.0. - return eval(s) - - p = compiler.parse("__tempvalue__ = " + s) - obj = p.getChildren()[1].getChildren()[0].getChildren()[1] - - return _Builder().build(obj) - - -def file_generator(input, chunkSize=65536): +class file_generator(object): """Yield the given input (a file object) in chunks (default 64k). (Core)""" - chunk = input.read(chunkSize) - while chunk: - yield chunk - chunk = input.read(chunkSize) - input.close() - + + def __init__(self, input, chunkSize=65536): + self.input = input + self.chunkSize = chunkSize + + def __iter__(self): + return self + + def __next__(self): + chunk = self.input.read(self.chunkSize) + if chunk: + return chunk + else: + if hasattr(self.input, 'close'): + self.input.close() + raise StopIteration() + next = __next__ def file_generator_limited(fileobj, count, chunk_size=65536): """Yield the given file object in chunks, stopping after `count` @@ -156,3 +36,10 @@ def file_generator_limited(fileobj, count, chunk_size=65536): remaining -= chunklen yield chunk +def set_vary_header(response, header_name): + "Add a Vary header to a response" + varies = response.headers.get("Vary", "") + varies = [x.strip() for x in varies.split(",") if x.strip()] + if header_name not in varies: + varies.append(header_name) + response.headers['Vary'] = ", ".join(varies) diff --git a/src/cherrypy/lib/auth.py b/src/cherrypy/lib/auth.py index 11a96735e6..7d2f6dc2fb 100644 --- a/src/cherrypy/lib/auth.py +++ b/src/cherrypy/lib/auth.py @@ -4,22 +4,23 @@ from cherrypy.lib import httpauth def check_auth(users, encrypt=None, realm=None): """If an authorization header contains credentials, return True, else False.""" - if 'authorization' in cherrypy.request.headers: + request = cherrypy.serving.request + if 'authorization' in request.headers: # make sure the provided credentials are correctly set - ah = httpauth.parseAuthorization(cherrypy.request.headers['authorization']) + ah = httpauth.parseAuthorization(request.headers['authorization']) if ah is None: raise cherrypy.HTTPError(400, 'Bad Request') if not encrypt: encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5] - if callable(users): + if hasattr(users, '__call__'): try: # backward compatibility users = users() # expect it to return a dictionary - + if not isinstance(users, dict): - raise ValueError, "Authentication users must be a dictionary" + raise ValueError("Authentication users must be a dictionary") # fetch the user password password = users.get(ah["username"], None) @@ -28,48 +29,59 @@ def check_auth(users, encrypt=None, realm=None): password = users(ah["username"]) else: if not isinstance(users, dict): - raise ValueError, "Authentication users must be a dictionary" + raise ValueError("Authentication users must be a dictionary") # fetch the user password password = users.get(ah["username"], None) # validate the authorization by re-computing it here # and compare it with what the user-agent provided - if httpauth.checkResponse(ah, password, method=cherrypy.request.method, + if httpauth.checkResponse(ah, password, method=request.method, encrypt=encrypt, realm=realm): - cherrypy.request.login = ah["username"] + request.login = ah["username"] return True - - cherrypy.request.login = False + + request.login = False return False -def basic_auth(realm, users, encrypt=None): +def basic_auth(realm, users, encrypt=None, debug=False): """If auth fails, raise 401 with a basic authentication header. - realm: a string containing the authentication realm. - users: a dict of the form: {username: password} or a callable returning a dict. - encrypt: callable used to encrypt the password returned from the user-agent. - if None it defaults to a md5 encryption. + realm + A string containing the authentication realm. + + users + A dict of the form: {username: password} or a callable returning a dict. + + encrypt + callable used to encrypt the password returned from the user-agent. + if None it defaults to a md5 encryption. + """ if check_auth(users, encrypt): + if debug: + cherrypy.log('Auth successful', 'TOOLS.BASIC_AUTH') return # inform the user-agent this path is protected - cherrypy.response.headers['www-authenticate'] = httpauth.basicAuth(realm) + cherrypy.serving.response.headers['www-authenticate'] = httpauth.basicAuth(realm) - raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") -def digest_auth(realm, users): +def digest_auth(realm, users, debug=False): """If auth fails, raise 401 with a digest authentication header. - realm: a string containing the authentication realm. - users: a dict of the form: {username: password} or a callable returning a dict. + realm + A string containing the authentication realm. + users + A dict of the form: {username: password} or a callable returning a dict. """ if check_auth(users, realm=realm): + if debug: + cherrypy.log('Auth successful', 'TOOLS.DIGEST_AUTH') return # inform the user-agent this path is protected - cherrypy.response.headers['www-authenticate'] = httpauth.digestAuth(realm) + cherrypy.serving.response.headers['www-authenticate'] = httpauth.digestAuth(realm) - raise cherrypy.HTTPError(401, "You are not authorized to access that resource") - + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") diff --git a/src/cherrypy/lib/auth_basic.py b/src/cherrypy/lib/auth_basic.py new file mode 100644 index 0000000000..2c05e01311 --- /dev/null +++ b/src/cherrypy/lib/auth_basic.py @@ -0,0 +1,87 @@ +# This file is part of CherryPy +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 + +__doc__ = """This module provides a CherryPy 3.x tool which implements +the server-side of HTTP Basic Access Authentication, as described in :rfc:`2617`. + +Example usage, using the built-in checkpassword_dict function which uses a dict +as the credentials store:: + + userpassdict = {'bird' : 'bebop', 'ornette' : 'wayout'} + checkpassword = cherrypy.lib.auth_basic.checkpassword_dict(userpassdict) + basic_auth = {'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'earth', + 'tools.auth_basic.checkpassword': checkpassword, + } + app_config = { '/' : basic_auth } + +""" + +__author__ = 'visteya' +__date__ = 'April 2009' + +import binascii +from cherrypy._cpcompat import base64_decode +import cherrypy + + +def checkpassword_dict(user_password_dict): + """Returns a checkpassword function which checks credentials + against a dictionary of the form: {username : password}. + + If you want a simple dictionary-based authentication scheme, use + checkpassword_dict(my_credentials_dict) as the value for the + checkpassword argument to basic_auth(). + """ + def checkpassword(realm, user, password): + p = user_password_dict.get(user) + return p and p == password or False + + return checkpassword + + +def basic_auth(realm, checkpassword, debug=False): + """A CherryPy tool which hooks at before_handler to perform + HTTP Basic Access Authentication, as specified in :rfc:`2617`. + + If the request has an 'authorization' header with a 'Basic' scheme, this + tool attempts to authenticate the credentials supplied in that header. If + the request has no 'authorization' header, or if it does but the scheme is + not 'Basic', or if authentication fails, the tool sends a 401 response with + a 'WWW-Authenticate' Basic header. + + realm + A string containing the authentication realm. + + checkpassword + A callable which checks the authentication credentials. + Its signature is checkpassword(realm, username, password). where + username and password are the values obtained from the request's + 'authorization' header. If authentication succeeds, checkpassword + returns True, else it returns False. + + """ + + if '"' in realm: + raise ValueError('Realm cannot contain the " (quote) character.') + request = cherrypy.serving.request + + auth_header = request.headers.get('authorization') + if auth_header is not None: + try: + scheme, params = auth_header.split(' ', 1) + if scheme.lower() == 'basic': + username, password = base64_decode(params).split(':', 1) + if checkpassword(realm, username, password): + if debug: + cherrypy.log('Auth succeeded', 'TOOLS.AUTH_BASIC') + request.login = username + return # successful authentication + except (ValueError, binascii.Error): # split() error, base64.decodestring() error + raise cherrypy.HTTPError(400, 'Bad Request') + + # Respond with 401 status and a WWW-Authenticate header + cherrypy.serving.response.headers['www-authenticate'] = 'Basic realm="%s"' % realm + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + diff --git a/src/cherrypy/lib/auth_digest.py b/src/cherrypy/lib/auth_digest.py new file mode 100644 index 0000000000..67578e0015 --- /dev/null +++ b/src/cherrypy/lib/auth_digest.py @@ -0,0 +1,365 @@ +# This file is part of CherryPy +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 + +__doc__ = """An implementation of the server-side of HTTP Digest Access +Authentication, which is described in :rfc:`2617`. + +Example usage, using the built-in get_ha1_dict_plain function which uses a dict +of plaintext passwords as the credentials store:: + + userpassdict = {'alice' : '4x5istwelve'} + get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain(userpassdict) + digest_auth = {'tools.auth_digest.on': True, + 'tools.auth_digest.realm': 'wonderland', + 'tools.auth_digest.get_ha1': get_ha1, + 'tools.auth_digest.key': 'a565c27146791cfb', + } + app_config = { '/' : digest_auth } +""" + +__author__ = 'visteya' +__date__ = 'April 2009' + + +import time +from cherrypy._cpcompat import parse_http_list, parse_keqv_list + +import cherrypy +from cherrypy._cpcompat import md5, ntob +md5_hex = lambda s: md5(ntob(s)).hexdigest() + +qop_auth = 'auth' +qop_auth_int = 'auth-int' +valid_qops = (qop_auth, qop_auth_int) + +valid_algorithms = ('MD5', 'MD5-sess') + + +def TRACE(msg): + cherrypy.log(msg, context='TOOLS.AUTH_DIGEST') + +# Three helper functions for users of the tool, providing three variants +# of get_ha1() functions for three different kinds of credential stores. +def get_ha1_dict_plain(user_password_dict): + """Returns a get_ha1 function which obtains a plaintext password from a + dictionary of the form: {username : password}. + + If you want a simple dictionary-based authentication scheme, with plaintext + passwords, use get_ha1_dict_plain(my_userpass_dict) as the value for the + get_ha1 argument to digest_auth(). + """ + def get_ha1(realm, username): + password = user_password_dict.get(username) + if password: + return md5_hex('%s:%s:%s' % (username, realm, password)) + return None + + return get_ha1 + +def get_ha1_dict(user_ha1_dict): + """Returns a get_ha1 function which obtains a HA1 password hash from a + dictionary of the form: {username : HA1}. + + If you want a dictionary-based authentication scheme, but with + pre-computed HA1 hashes instead of plain-text passwords, use + get_ha1_dict(my_userha1_dict) as the value for the get_ha1 + argument to digest_auth(). + """ + def get_ha1(realm, username): + return user_ha1_dict.get(user) + + return get_ha1 + +def get_ha1_file_htdigest(filename): + """Returns a get_ha1 function which obtains a HA1 password hash from a + flat file with lines of the same format as that produced by the Apache + htdigest utility. For example, for realm 'wonderland', username 'alice', + and password '4x5istwelve', the htdigest line would be:: + + alice:wonderland:3238cdfe91a8b2ed8e39646921a02d4c + + If you want to use an Apache htdigest file as the credentials store, + then use get_ha1_file_htdigest(my_htdigest_file) as the value for the + get_ha1 argument to digest_auth(). It is recommended that the filename + argument be an absolute path, to avoid problems. + """ + def get_ha1(realm, username): + result = None + f = open(filename, 'r') + for line in f: + u, r, ha1 = line.rstrip().split(':') + if u == username and r == realm: + result = ha1 + break + f.close() + return result + + return get_ha1 + + +def synthesize_nonce(s, key, timestamp=None): + """Synthesize a nonce value which resists spoofing and can be checked for staleness. + Returns a string suitable as the value for 'nonce' in the www-authenticate header. + + s + A string related to the resource, such as the hostname of the server. + + key + A secret string known only to the server. + + timestamp + An integer seconds-since-the-epoch timestamp + + """ + if timestamp is None: + timestamp = int(time.time()) + h = md5_hex('%s:%s:%s' % (timestamp, s, key)) + nonce = '%s:%s' % (timestamp, h) + return nonce + + +def H(s): + """The hash function H""" + return md5_hex(s) + + +class HttpDigestAuthorization (object): + """Class to parse a Digest Authorization header and perform re-calculation + of the digest. + """ + + def errmsg(self, s): + return 'Digest Authorization header: %s' % s + + def __init__(self, auth_header, http_method, debug=False): + self.http_method = http_method + self.debug = debug + scheme, params = auth_header.split(" ", 1) + self.scheme = scheme.lower() + if self.scheme != 'digest': + raise ValueError('Authorization scheme is not "Digest"') + + self.auth_header = auth_header + + # make a dict of the params + items = parse_http_list(params) + paramsd = parse_keqv_list(items) + + self.realm = paramsd.get('realm') + self.username = paramsd.get('username') + self.nonce = paramsd.get('nonce') + self.uri = paramsd.get('uri') + self.method = paramsd.get('method') + self.response = paramsd.get('response') # the response digest + self.algorithm = paramsd.get('algorithm', 'MD5') + self.cnonce = paramsd.get('cnonce') + self.opaque = paramsd.get('opaque') + self.qop = paramsd.get('qop') # qop + self.nc = paramsd.get('nc') # nonce count + + # perform some correctness checks + if self.algorithm not in valid_algorithms: + raise ValueError(self.errmsg("Unsupported value for algorithm: '%s'" % self.algorithm)) + + has_reqd = self.username and \ + self.realm and \ + self.nonce and \ + self.uri and \ + self.response + if not has_reqd: + raise ValueError(self.errmsg("Not all required parameters are present.")) + + if self.qop: + if self.qop not in valid_qops: + raise ValueError(self.errmsg("Unsupported value for qop: '%s'" % self.qop)) + if not (self.cnonce and self.nc): + raise ValueError(self.errmsg("If qop is sent then cnonce and nc MUST be present")) + else: + if self.cnonce or self.nc: + raise ValueError(self.errmsg("If qop is not sent, neither cnonce nor nc can be present")) + + + def __str__(self): + return 'authorization : %s' % self.auth_header + + def validate_nonce(self, s, key): + """Validate the nonce. + Returns True if nonce was generated by synthesize_nonce() and the timestamp + is not spoofed, else returns False. + + s + A string related to the resource, such as the hostname of the server. + + key + A secret string known only to the server. + + Both s and key must be the same values which were used to synthesize the nonce + we are trying to validate. + """ + try: + timestamp, hashpart = self.nonce.split(':', 1) + s_timestamp, s_hashpart = synthesize_nonce(s, key, timestamp).split(':', 1) + is_valid = s_hashpart == hashpart + if self.debug: + TRACE('validate_nonce: %s' % is_valid) + return is_valid + except ValueError: # split() error + pass + return False + + + def is_nonce_stale(self, max_age_seconds=600): + """Returns True if a validated nonce is stale. The nonce contains a + timestamp in plaintext and also a secure hash of the timestamp. You should + first validate the nonce to ensure the plaintext timestamp is not spoofed. + """ + try: + timestamp, hashpart = self.nonce.split(':', 1) + if int(timestamp) + max_age_seconds > int(time.time()): + return False + except ValueError: # int() error + pass + if self.debug: + TRACE("nonce is stale") + return True + + + def HA2(self, entity_body=''): + """Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3.""" + # RFC 2617 3.2.2.3 + # If the "qop" directive's value is "auth" or is unspecified, then A2 is: + # A2 = method ":" digest-uri-value + # + # If the "qop" value is "auth-int", then A2 is: + # A2 = method ":" digest-uri-value ":" H(entity-body) + if self.qop is None or self.qop == "auth": + a2 = '%s:%s' % (self.http_method, self.uri) + elif self.qop == "auth-int": + a2 = "%s:%s:%s" % (self.http_method, self.uri, H(entity_body)) + else: + # in theory, this should never happen, since I validate qop in __init__() + raise ValueError(self.errmsg("Unrecognized value for qop!")) + return H(a2) + + + def request_digest(self, ha1, entity_body=''): + """Calculates the Request-Digest. See :rfc:`2617` section 3.2.2.1. + + ha1 + The HA1 string obtained from the credentials store. + + entity_body + If 'qop' is set to 'auth-int', then A2 includes a hash + of the "entity body". The entity body is the part of the + message which follows the HTTP headers. See :rfc:`2617` section + 4.3. This refers to the entity the user agent sent in the request which + has the Authorization header. Typically GET requests don't have an entity, + and POST requests do. + + """ + ha2 = self.HA2(entity_body) + # Request-Digest -- RFC 2617 3.2.2.1 + if self.qop: + req = "%s:%s:%s:%s:%s" % (self.nonce, self.nc, self.cnonce, self.qop, ha2) + else: + req = "%s:%s" % (self.nonce, ha2) + + # RFC 2617 3.2.2.2 + # + # If the "algorithm" directive's value is "MD5" or is unspecified, then A1 is: + # A1 = unq(username-value) ":" unq(realm-value) ":" passwd + # + # If the "algorithm" directive's value is "MD5-sess", then A1 is + # calculated only once - on the first request by the client following + # receipt of a WWW-Authenticate challenge from the server. + # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) + # ":" unq(nonce-value) ":" unq(cnonce-value) + if self.algorithm == 'MD5-sess': + ha1 = H('%s:%s:%s' % (ha1, self.nonce, self.cnonce)) + + digest = H('%s:%s' % (ha1, req)) + return digest + + + +def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, stale=False): + """Constructs a WWW-Authenticate header for Digest authentication.""" + if qop not in valid_qops: + raise ValueError("Unsupported value for qop: '%s'" % qop) + if algorithm not in valid_algorithms: + raise ValueError("Unsupported value for algorithm: '%s'" % algorithm) + + if nonce is None: + nonce = synthesize_nonce(realm, key) + s = 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( + realm, nonce, algorithm, qop) + if stale: + s += ', stale="true"' + return s + + +def digest_auth(realm, get_ha1, key, debug=False): + """A CherryPy tool which hooks at before_handler to perform + HTTP Digest Access Authentication, as specified in :rfc:`2617`. + + If the request has an 'authorization' header with a 'Digest' scheme, this + tool authenticates the credentials supplied in that header. If + the request has no 'authorization' header, or if it does but the scheme is + not "Digest", or if authentication fails, the tool sends a 401 response with + a 'WWW-Authenticate' Digest header. + + realm + A string containing the authentication realm. + + get_ha1 + A callable which looks up a username in a credentials store + and returns the HA1 string, which is defined in the RFC to be + MD5(username : realm : password). The function's signature is: + ``get_ha1(realm, username)`` + where username is obtained from the request's 'authorization' header. + If username is not found in the credentials store, get_ha1() returns + None. + + key + A secret string known only to the server, used in the synthesis of nonces. + + """ + request = cherrypy.serving.request + + auth_header = request.headers.get('authorization') + nonce_is_stale = False + if auth_header is not None: + try: + auth = HttpDigestAuthorization(auth_header, request.method, debug=debug) + except ValueError: + raise cherrypy.HTTPError(400, "The Authorization header could not be parsed.") + + if debug: + TRACE(str(auth)) + + if auth.validate_nonce(realm, key): + ha1 = get_ha1(realm, auth.username) + if ha1 is not None: + # note that for request.body to be available we need to hook in at + # before_handler, not on_start_resource like 3.1.x digest_auth does. + digest = auth.request_digest(ha1, entity_body=request.body) + if digest == auth.response: # authenticated + if debug: + TRACE("digest matches auth.response") + # Now check if nonce is stale. + # The choice of ten minutes' lifetime for nonce is somewhat arbitrary + nonce_is_stale = auth.is_nonce_stale(max_age_seconds=600) + if not nonce_is_stale: + request.login = auth.username + if debug: + TRACE("authentication of %s successful" % auth.username) + return + + # Respond with 401 status and a WWW-Authenticate header + header = www_authenticate(realm, key, stale=nonce_is_stale) + if debug: + TRACE(header) + cherrypy.serving.response.headers['WWW-Authenticate'] = header + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + diff --git a/src/cherrypy/lib/caching.py b/src/cherrypy/lib/caching.py index cc6b79c605..435b9dc184 100644 --- a/src/cherrypy/lib/caching.py +++ b/src/cherrypy/lib/caching.py @@ -1,32 +1,179 @@ +""" +CherryPy implements a simple caching system as a pluggable Tool. This tool tries +to be an (in-process) HTTP/1.1-compliant cache. It's not quite there yet, but +it's probably good enough for most sites. + +In general, GET responses are cached (along with selecting headers) and, if +another request arrives for the same resource, the caching Tool will return 304 +Not Modified if possible, or serve the cached response otherwise. It also sets +request.cached to True if serving a cached representation, and sets +request.cacheable to False (so it doesn't get cached again). + +If POST, PUT, or DELETE requests are made for a cached resource, they invalidate +(delete) any cached response. + +Usage +===== + +Configuration file example:: + + [/] + tools.caching.on = True + tools.caching.delay = 3600 + +You may use a class other than the default +:class:`MemoryCache` by supplying the config +entry ``cache_class``; supply the full dotted name of the replacement class +as the config value. It must implement the basic methods ``get``, ``put``, +``delete``, and ``clear``. + +You may set any attribute, including overriding methods, on the cache +instance by providing them in config. The above sets the +:attr:`delay` attribute, for example. +""" + import datetime +import sys import threading import time import cherrypy -from cherrypy.lib import cptools, http +from cherrypy.lib import cptools, httputil +from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted -class MemoryCache: +class Cache(object): + """Base class for Cache implementations.""" + + def get(self): + """Return the current variant if in the cache, else None.""" + raise NotImplemented + + def put(self, obj, size): + """Store the current variant in the cache.""" + raise NotImplemented + + def delete(self): + """Remove ALL cached variants of the current resource.""" + raise NotImplemented + + def clear(self): + """Reset the cache to its initial, empty state.""" + raise NotImplemented + + + +# ------------------------------- Memory Cache ------------------------------- # + + +class AntiStampedeCache(dict): + """A storage system for cached items which reduces stampede collisions.""" + + def wait(self, key, timeout=5, debug=False): + """Return the cached value for the given key, or None. + + If timeout is not None, and the value is already + being calculated by another thread, wait until the given timeout has + elapsed. If the value is available before the timeout expires, it is + returned. If not, None is returned, and a sentinel placed in the cache + to signal other threads to wait. + + If timeout is None, no waiting is performed nor sentinels used. + """ + value = self.get(key) + if isinstance(value, threading._Event): + if timeout is None: + # Ignore the other thread and recalc it ourselves. + if debug: + cherrypy.log('No timeout', 'TOOLS.CACHING') + return None + + # Wait until it's done or times out. + if debug: + cherrypy.log('Waiting up to %s seconds' % timeout, 'TOOLS.CACHING') + value.wait(timeout) + if value.result is not None: + # The other thread finished its calculation. Use it. + if debug: + cherrypy.log('Result!', 'TOOLS.CACHING') + return value.result + # Timed out. Stick an Event in the slot so other threads wait + # on this one to finish calculating the value. + if debug: + cherrypy.log('Timed out', 'TOOLS.CACHING') + e = threading.Event() + e.result = None + dict.__setitem__(self, key, e) + + return None + elif value is None: + # Stick an Event in the slot so other threads wait + # on this one to finish calculating the value. + if debug: + cherrypy.log('Timed out', 'TOOLS.CACHING') + e = threading.Event() + e.result = None + dict.__setitem__(self, key, e) + return value + + def __setitem__(self, key, value): + """Set the cached value for the given key.""" + existing = self.get(key) + dict.__setitem__(self, key, value) + if isinstance(existing, threading._Event): + # Set Event.result so other threads waiting on it have + # immediate access without needing to poll the cache again. + existing.result = value + existing.set() + + +class MemoryCache(Cache): + """An in-memory cache for varying response content. + + Each key in self.store is a URI, and each value is an AntiStampedeCache. + The response for any given URI may vary based on the values of + "selecting request headers"; that is, those named in the Vary + response header. We assume the list of header names to be constant + for each URI throughout the lifetime of the application, and store + that list in ``self.store[uri].selecting_headers``. + + The items contained in ``self.store[uri]`` have keys which are tuples of + request header values (in the same order as the names in its + selecting_headers), and values which are the actual responses. + """ maxobjects = 1000 + """The maximum number of cached objects; defaults to 1000.""" + maxobj_size = 100000 + """The maximum size of each cached object in bytes; defaults to 100 KB.""" + maxsize = 10000000 + """The maximum size of the entire cache in bytes; defaults to 10 MB.""" + delay = 600 + """Seconds until the cached content expires; defaults to 600 (10 minutes).""" + + antistampede_timeout = 5 + """Seconds to wait for other threads to release a cache lock.""" + + expire_freq = 0.1 + """Seconds to sleep between cache expiration sweeps.""" + + debug = False def __init__(self): self.clear() + + # Run self.expire_cache in a separate daemon thread. t = threading.Thread(target=self.expire_cache, name='expire_cache') self.expiration_thread = t - if hasattr(threading.Thread, "daemon"): - # Python 2.6+ - t.daemon = True - else: - t.setDaemon(True) + set_daemon(t, True) t.start() def clear(self): """Reset the cache to its initial, empty state.""" - self.cache = {} + self.store = {} self.expirations = {} self.tot_puts = 0 self.tot_gets = 0 @@ -35,61 +182,88 @@ class MemoryCache: self.tot_non_modified = 0 self.cursize = 0 - def key(self): - return cherrypy.url(qs=cherrypy.request.query_string) - def expire_cache(self): - # expire_cache runs in a separate thread which the servers are - # not aware of. It's possible that "time" will be set to None + """Continuously examine cached objects, expiring stale ones. + + This function is designed to be run in its own daemon thread, + referenced at ``self.expiration_thread``. + """ + # It's possible that "time" will be set to None # arbitrarily, so we check "while time" to avoid exceptions. # See tickets #99 and #180 for more information. while time: now = time.time() - for expiration_time, objects in self.expirations.items(): + # Must make a copy of expirations so it doesn't change size + # during iteration + for expiration_time, objects in copyitems(self.expirations): if expiration_time <= now: - for obj_size, obj_key in objects: + for obj_size, uri, sel_header_values in objects: try: - del self.cache[obj_key] + del self.store[uri][tuple(sel_header_values)] self.tot_expires += 1 self.cursize -= obj_size except KeyError: # the key may have been deleted elsewhere pass del self.expirations[expiration_time] - time.sleep(0.1) + time.sleep(self.expire_freq) def get(self): - """Return the object if in the cache, else None.""" + """Return the current variant if in the cache, else None.""" + request = cherrypy.serving.request self.tot_gets += 1 - cache_item = self.cache.get(self.key(), None) - if cache_item: - self.tot_hist += 1 - return cache_item - else: + + uri = cherrypy.url(qs=request.query_string) + uricache = self.store.get(uri) + if uricache is None: return None + + header_values = [request.headers.get(h, '') + for h in uricache.selecting_headers] + variant = uricache.wait(key=tuple(sorted(header_values)), + timeout=self.antistampede_timeout, + debug=self.debug) + if variant is not None: + self.tot_hist += 1 + return variant - def put(self, obj): - if len(self.cache) < self.maxobjects: - # Size check no longer includes header length - obj_size = len(obj[2]) - total_size = self.cursize + obj_size + def put(self, variant, size): + """Store the current variant in the cache.""" + request = cherrypy.serving.request + response = cherrypy.serving.response + + uri = cherrypy.url(qs=request.query_string) + uricache = self.store.get(uri) + if uricache is None: + uricache = AntiStampedeCache() + uricache.selecting_headers = [ + e.value for e in response.headers.elements('Vary')] + self.store[uri] = uricache + + if len(self.store) < self.maxobjects: + total_size = self.cursize + size # checks if there's space for the object - if (obj_size < self.maxobj_size and total_size < self.maxsize): - # add to the expirations list and cache - expiration_time = cherrypy.response.time + self.delay - obj_key = self.key() + if (size < self.maxobj_size and total_size < self.maxsize): + # add to the expirations list + expiration_time = response.time + self.delay bucket = self.expirations.setdefault(expiration_time, []) - bucket.append((obj_size, obj_key)) - self.cache[obj_key] = obj + bucket.append((size, uri, uricache.selecting_headers)) + + # add to the cache + header_values = [request.headers.get(h, '') + for h in uricache.selecting_headers] + uricache[tuple(sorted(header_values))] = variant self.tot_puts += 1 self.cursize = total_size def delete(self): - self.cache.pop(self.key(), None) + """Remove ALL cached variants of the current resource.""" + uri = cherrypy.url(qs=cherrypy.serving.request.query_string) + self.store.pop(uri, None) -def get(invalid_methods=("POST", "PUT", "DELETE"), **kwargs): +def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs): """Try to obtain cached output. If fresh enough, raise HTTPError(304). If POST, PUT, or DELETE: @@ -102,8 +276,8 @@ def get(invalid_methods=("POST", "PUT", "DELETE"), **kwargs): * sets request.cacheable = False * sets response.headers to the cached values * checks the cached Last-Modified response header against the - current If-(Un)Modified-Since request headers; raises 304 - if necessary. + current If-(Un)Modified-Since request headers; raises 304 + if necessary. * sets response.status and response.body to the cached values * returns True @@ -112,56 +286,83 @@ def get(invalid_methods=("POST", "PUT", "DELETE"), **kwargs): * sets request.cacheable = True * returns False """ - request = cherrypy.request + request = cherrypy.serving.request + response = cherrypy.serving.response + + if not hasattr(cherrypy, "_cache"): + # Make a process-wide Cache object. + cherrypy._cache = kwargs.pop("cache_class", MemoryCache)() + + # Take all remaining kwargs and set them on the Cache object. + for k, v in kwargs.items(): + setattr(cherrypy._cache, k, v) + cherrypy._cache.debug = debug # POST, PUT, DELETE should invalidate (delete) the cached copy. # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. if request.method in invalid_methods: + if debug: + cherrypy.log('request.method %r in invalid_methods %r' % + (request.method, invalid_methods), 'TOOLS.CACHING') cherrypy._cache.delete() request.cached = False request.cacheable = False return False + if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]: + request.cached = False + request.cacheable = True + return False + cache_data = cherrypy._cache.get() - request.cached = c = bool(cache_data) - request.cacheable = not c - if c: - response = cherrypy.response - s, h, b, create_time, original_req_headers = cache_data - - # Check 'Vary' selecting headers. If any headers mentioned in "Vary" - # differ between the cached and current request, bail out and - # let the rest of CP handle the request. This should properly - # mimic the behavior of isolated caches as RFC 2616 assumes: - # "If the selecting request header fields for the cached entry - # do not match the selecting request header fields of the new - # request, then the cache MUST NOT use a cached entry to satisfy - # the request unless it first relays the new request to the origin - # server in a conditional request and the server responds with - # 304 (Not Modified), including an entity tag or Content-Location - # that indicates the entity to be used. - # TODO: can we store multiple variants based on Vary'd headers? - for header_element in h.elements('Vary'): - key = header_element.value - if original_req_headers[key] != request.headers.get(key, 'missing'): + request.cached = bool(cache_data) + request.cacheable = not request.cached + if request.cached: + # Serve the cached copy. + max_age = cherrypy._cache.delay + for v in [e.value for e in request.headers.elements('Cache-Control')]: + atoms = v.split('=', 1) + directive = atoms.pop(0) + if directive == 'max-age': + if len(atoms) != 1 or not atoms[0].isdigit(): + raise cherrypy.HTTPError(400, "Invalid Cache-Control header") + max_age = int(atoms[0]) + break + elif directive == 'no-cache': + if debug: + cherrypy.log('Ignoring cache due to Cache-Control: no-cache', + 'TOOLS.CACHING') request.cached = False request.cacheable = True return False + if debug: + cherrypy.log('Reading response from cache', 'TOOLS.CACHING') + s, h, b, create_time = cache_data + age = int(response.time - create_time) + if (age > max_age): + if debug: + cherrypy.log('Ignoring cache due to age > %d' % max_age, + 'TOOLS.CACHING') + request.cached = False + request.cacheable = True + return False + # Copy the response headers. See http://www.cherrypy.org/ticket/721. - response.headers = rh = http.HeaderMap() + response.headers = rh = httputil.HeaderMap() for k in h: dict.__setitem__(rh, k, dict.__getitem__(h, k)) # Add the required Age header - response.headers["Age"] = str(int(response.time - create_time)) + response.headers["Age"] = str(age) try: # Note that validate_since depends on a Last-Modified header; # this was put into the cached copy, and should have been # resurrected just above (response.headers = cache_data[1]). cptools.validate_since() - except cherrypy.HTTPRedirect, x: + except cherrypy.HTTPRedirect: + x = sys.exc_info()[1] if x.status == 304: cherrypy._cache.tot_non_modified += 1 raise @@ -169,54 +370,68 @@ def get(invalid_methods=("POST", "PUT", "DELETE"), **kwargs): # serve it & get out from the request response.status = s response.body = b - return c + else: + if debug: + cherrypy.log('request is not cached', 'TOOLS.CACHING') + return request.cached def tee_output(): + """Tee response output to cache storage. Internal.""" + # Used by CachingTool by attaching to request.hooks + + request = cherrypy.serving.request + if 'no-store' in request.headers.values('Cache-Control'): + return + def tee(body): """Tee response.body into a list.""" + if ('no-cache' in response.headers.values('Pragma') or + 'no-store' in response.headers.values('Cache-Control')): + for chunk in body: + yield chunk + return + output = [] for chunk in body: output.append(chunk) yield chunk - # Might as well do this here; why cache if the body isn't consumed? - if response.headers.get('Pragma', None) != 'no-cache': - # save the cache data - body = ''.join(output) - vary = [he.value for he in - cherrypy.response.headers.elements('Vary')] - if vary: - sel_headers = dict([(k, v) for k, v - in cherrypy.request.headers.iteritems() - if k in vary]) - else: - sel_headers = {} - cherrypy._cache.put((response.status, response.headers or {}, - body, response.time, sel_headers)) + # save the cache data + body = ntob('').join(output) + cherrypy._cache.put((response.status, response.headers or {}, + body, response.time), len(body)) - response = cherrypy.response + response = cherrypy.serving.response response.body = tee(response.body) -def expires(secs=0, force=False): +def expires(secs=0, force=False, debug=False): """Tool for influencing cache mechanisms using the 'Expires' header. + + secs + Must be either an int or a datetime.timedelta, and indicates the + number of seconds between response.time and when the response should + expire. The 'Expires' header will be set to response.time + secs. + If secs is zero, the 'Expires' header is set one year in the past, and + the following "cache prevention" headers are also set: + + * Pragma: no-cache + * Cache-Control': no-cache, must-revalidate + + force + If False, the following headers are checked: + + * Etag + * Last-Modified + * Age + * Expires + + If any are already present, none of the above response headers are set. - 'secs' must be either an int or a datetime.timedelta, and indicates the - number of seconds between response.time and when the response should - expire. The 'Expires' header will be set to (response.time + secs). - - If 'secs' is zero, the 'Expires' header is set one year in the past, and - the following "cache prevention" headers are also set: - 'Pragma': 'no-cache' - 'Cache-Control': 'no-cache, must-revalidate' - - If 'force' is False (the default), the following headers are checked: - 'Etag', 'Last-Modified', 'Age', 'Expires'. If any are already present, - none of the above response headers are set. """ - response = cherrypy.response + response = cherrypy.serving.response headers = response.headers cacheable = False @@ -227,19 +442,24 @@ def expires(secs=0, force=False): cacheable = True break - if not cacheable: + if not cacheable and not force: + if debug: + cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES') + else: + if debug: + cherrypy.log('request is cacheable', 'TOOLS.EXPIRES') if isinstance(secs, datetime.timedelta): secs = (86400 * secs.days) + secs.seconds if secs == 0: - if force or "Pragma" not in headers: + if force or ("Pragma" not in headers): headers["Pragma"] = "no-cache" - if cherrypy.request.protocol >= (1, 1): + if cherrypy.serving.request.protocol >= (1, 1): if force or "Cache-Control" not in headers: headers["Cache-Control"] = "no-cache, must-revalidate" # Set an explicit Expires date in the past. - expiry = http.HTTPDate(1169942400.0) + expiry = httputil.HTTPDate(1169942400.0) else: - expiry = http.HTTPDate(response.time + secs) + expiry = httputil.HTTPDate(response.time + secs) if force or "Expires" not in headers: headers["Expires"] = expiry diff --git a/src/cherrypy/lib/covercp.py b/src/cherrypy/lib/covercp.py index ecc8cf47da..9b701b560d 100644 --- a/src/cherrypy/lib/covercp.py +++ b/src/cherrypy/lib/covercp.py @@ -1,55 +1,50 @@ """Code-coverage tools for CherryPy. To use this module, or the coverage tools in the test suite, -you need to download 'coverage.py', either Gareth Rees' original -implementation: -http://www.garethrees.org/2001/12/04/python-coverage/ +you need to download 'coverage.py', either Gareth Rees' `original +implementation `_ +or Ned Batchelder's `enhanced version: +`_ -or Ned Batchelder's enhanced version: -http://www.nedbatchelder.com/code/modules/coverage.html - -To turn on coverage tracing, use the following code: +To turn on coverage tracing, use the following code:: cherrypy.engine.subscribe('start', covercp.start) - cherrypy.engine.subscribe('start_thread', covercp.start) -Run your code, then use the covercp.serve() function to browse the +DO NOT subscribe anything on the 'start_thread' channel, as previously +recommended. Calling start once in the main thread should be sufficient +to start coverage on all threads. Calling start again in each thread +effectively clears any coverage data gathered up to that point. + +Run your code, then use the ``covercp.serve()`` function to browse the results in a web browser. If you run this module from the command line, -it will call serve() for you. +it will call ``serve()`` for you. """ import re import sys import cgi -import urllib +from cherrypy._cpcompat import quote_plus import os, os.path localFile = os.path.join(os.path.dirname(__file__), "coverage.cache") +the_coverage = None try: - import cStringIO as StringIO + from coverage import coverage + the_coverage = coverage(data_file=localFile) + def start(): + the_coverage.start() except ImportError: - import StringIO - -try: - from coverage import the_coverage as coverage - def start(threadid=None): - coverage.start() -except ImportError: - # Setting coverage to None will raise errors + # Setting the_coverage to None will raise errors # that need to be trapped downstream. - coverage = None + the_coverage = None import warnings warnings.warn("No code coverage will be performed; coverage.py could not be imported.") - def start(threadid=None): + def start(): pass start.priority = 20 -# Guess initial depth to hide FIXME this doesn't work for non-cherrypy stuff -import cherrypy -initial_base = os.path.dirname(cherrypy.__file__) - TEMPLATE_MENU = """ CherryPy Coverage Menu @@ -140,7 +135,7 @@ TEMPLATE_FRAMESET = """ -""" % initial_base.lower() +""" TEMPLATE_COVERAGE = """ @@ -187,10 +182,11 @@ def _percent(statements, missing): return int(round(100.0 * e / s)) return 0 -def _show_branch(root, base, path, pct=0, showpct=False, exclude=""): +def _show_branch(root, base, path, pct=0, showpct=False, exclude="", + coverage=the_coverage): # Show the directory name and any of our children - dirs = [k for k, v in root.iteritems() if v] + dirs = [k for k, v in root.items() if v] dirs.sort() for name in dirs: newpath = os.path.join(path, name) @@ -199,15 +195,15 @@ def _show_branch(root, base, path, pct=0, showpct=False, exclude=""): relpath = newpath[len(base):] yield "| " * relpath.count(os.sep) yield "%s\n" % \ - (newpath, urllib.quote_plus(exclude), name) + (newpath, quote_plus(exclude), name) - for chunk in _show_branch(root[name], base, newpath, pct, showpct, exclude): + for chunk in _show_branch(root[name], base, newpath, pct, showpct, exclude, coverage=coverage): yield chunk # Now list the files if path.lower().startswith(base): relpath = path[len(base):] - files = [k for k, v in root.iteritems() if not v] + files = [k for k, v in root.items() if not v] files.sort() for name in files: newpath = os.path.join(path, name) @@ -253,21 +249,28 @@ def _graft(path, tree): if node: d = d.setdefault(node, {}) -def get_tree(base, exclude): +def get_tree(base, exclude, coverage=the_coverage): """Return covered module names as a nested dict.""" tree = {} - coverage.get_ready() - runs = coverage.cexecuted.keys() - if runs: - for path in runs: - if not _skip_file(path, exclude) and not os.path.isdir(path): - _graft(path, tree) + runs = coverage.data.executed_files() + for path in runs: + if not _skip_file(path, exclude) and not os.path.isdir(path): + _graft(path, tree) return tree class CoverStats(object): + def __init__(self, coverage, root=None): + self.coverage = coverage + if root is None: + # Guess initial depth. Files outside this path will not be + # reachable from the web interface. + import cherrypy + root = os.path.dirname(cherrypy.__file__) + self.root = root + def index(self): - return TEMPLATE_FRAMESET + return TEMPLATE_FRAMESET % self.root.lower() index.exposed = True def menu(self, base="/", pct="50", showpct="", @@ -287,18 +290,18 @@ class CoverStats(object): for atom in atoms: path += atom + os.sep yield ("%s %s" - % (path, urllib.quote_plus(exclude), atom, os.sep)) + % (path, quote_plus(exclude), atom, os.sep)) yield "" yield "
" # Then display the tree - tree = get_tree(base, exclude) + tree = get_tree(base, exclude, self.coverage) if not tree: yield "

No modules covered.

" else: for chunk in _show_branch(tree, base, "/", pct, - showpct=='checked', exclude): + showpct=='checked', exclude, coverage=self.coverage): yield chunk yield "
" @@ -328,8 +331,7 @@ class CoverStats(object): yield template % (lineno, cgi.escape(line)) def report(self, name): - coverage.get_ready() - filename, statements, excluded, missing, _ = coverage.analysis2(name) + filename, statements, excluded, missing, _ = self.coverage.analysis2(name) pc = _percent(statements, missing) yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name), fullpath=name, @@ -344,17 +346,19 @@ class CoverStats(object): report.exposed = True -def serve(path=localFile, port=8080): +def serve(path=localFile, port=8080, root=None): if coverage is None: raise ImportError("The coverage module could not be imported.") - coverage.cache_default = path + from coverage import coverage + cov = coverage(data_file = path) + cov.load() import cherrypy - cherrypy.config.update({'server.socket_port': port, + cherrypy.config.update({'server.socket_port': int(port), 'server.thread_pool': 10, 'environment': "production", }) - cherrypy.quickstart(CoverStats()) + cherrypy.quickstart(CoverStats(cov, root)) if __name__ == "__main__": serve(*tuple(sys.argv[1:])) diff --git a/src/cherrypy/lib/cpstats.py b/src/cherrypy/lib/cpstats.py new file mode 100644 index 0000000000..9be947f2b7 --- /dev/null +++ b/src/cherrypy/lib/cpstats.py @@ -0,0 +1,662 @@ +"""CPStats, a package for collecting and reporting on program statistics. + +Overview +======== + +Statistics about program operation are an invaluable monitoring and debugging +tool. Unfortunately, the gathering and reporting of these critical values is +usually ad-hoc. This package aims to add a centralized place for gathering +statistical performance data, a structure for recording that data which +provides for extrapolation of that data into more useful information, +and a method of serving that data to both human investigators and +monitoring software. Let's examine each of those in more detail. + +Data Gathering +-------------- + +Just as Python's `logging` module provides a common importable for gathering +and sending messages, performance statistics would benefit from a similar +common mechanism, and one that does *not* require each package which wishes +to collect stats to import a third-party module. Therefore, we choose to +re-use the `logging` module by adding a `statistics` object to it. + +That `logging.statistics` object is a nested dict. It is not a custom class, +because that would 1) require libraries and applications to import a third- +party module in order to participate, 2) inhibit innovation in extrapolation +approaches and in reporting tools, and 3) be slow. There are, however, some +specifications regarding the structure of the dict. + + { + +----"SQLAlchemy": { + | "Inserts": 4389745, + | "Inserts per Second": + | lambda s: s["Inserts"] / (time() - s["Start"]), + | C +---"Table Statistics": { + | o | "widgets": {-----------+ + N | l | "Rows": 1.3M, | Record + a | l | "Inserts": 400, | + m | e | },---------------------+ + e | c | "froobles": { + s | t | "Rows": 7845, + p | i | "Inserts": 0, + a | o | }, + c | n +---}, + e | "Slow Queries": + | [{"Query": "SELECT * FROM widgets;", + | "Processing Time": 47.840923343, + | }, + | ], + +----}, + } + +The `logging.statistics` dict has four levels. The topmost level is nothing +more than a set of names to introduce modularity, usually along the lines of +package names. If the SQLAlchemy project wanted to participate, for example, +it might populate the item `logging.statistics['SQLAlchemy']`, whose value +would be a second-layer dict we call a "namespace". Namespaces help multiple +packages to avoid collisions over key names, and make reports easier to read, +to boot. The maintainers of SQLAlchemy should feel free to use more than one +namespace if needed (such as 'SQLAlchemy ORM'). Note that there are no case +or other syntax constraints on the namespace names; they should be chosen +to be maximally readable by humans (neither too short nor too long). + +Each namespace, then, is a dict of named statistical values, such as +'Requests/sec' or 'Uptime'. You should choose names which will look +good on a report: spaces and capitalization are just fine. + +In addition to scalars, values in a namespace MAY be a (third-layer) +dict, or a list, called a "collection". For example, the CherryPy StatsTool +keeps track of what each request is doing (or has most recently done) +in a 'Requests' collection, where each key is a thread ID; each +value in the subdict MUST be a fourth dict (whew!) of statistical data about +each thread. We call each subdict in the collection a "record". Similarly, +the StatsTool also keeps a list of slow queries, where each record contains +data about each slow query, in order. + +Values in a namespace or record may also be functions, which brings us to: + +Extrapolation +------------- + +The collection of statistical data needs to be fast, as close to unnoticeable +as possible to the host program. That requires us to minimize I/O, for example, +but in Python it also means we need to minimize function calls. So when you +are designing your namespace and record values, try to insert the most basic +scalar values you already have on hand. + +When it comes time to report on the gathered data, however, we usually have +much more freedom in what we can calculate. Therefore, whenever reporting +tools (like the provided StatsPage CherryPy class) fetch the contents of +`logging.statistics` for reporting, they first call `extrapolate_statistics` +(passing the whole `statistics` dict as the only argument). This makes a +deep copy of the statistics dict so that the reporting tool can both iterate +over it and even change it without harming the original. But it also expands +any functions in the dict by calling them. For example, you might have a +'Current Time' entry in the namespace with the value "lambda scope: time.time()". +The "scope" parameter is the current namespace dict (or record, if we're +currently expanding one of those instead), allowing you access to existing +static entries. If you're truly evil, you can even modify more than one entry +at a time. + +However, don't try to calculate an entry and then use its value in further +extrapolations; the order in which the functions are called is not guaranteed. +This can lead to a certain amount of duplicated work (or a redesign of your +schema), but that's better than complicating the spec. + +After the whole thing has been extrapolated, it's time for: + +Reporting +--------- + +The StatsPage class grabs the `logging.statistics` dict, extrapolates it all, +and then transforms it to HTML for easy viewing. Each namespace gets its own +header and attribute table, plus an extra table for each collection. This is +NOT part of the statistics specification; other tools can format how they like. + +You can control which columns are output and how they are formatted by updating +StatsPage.formatting, which is a dict that mirrors the keys and nesting of +`logging.statistics`. The difference is that, instead of data values, it has +formatting values. Use None for a given key to indicate to the StatsPage that a +given column should not be output. Use a string with formatting (such as '%.3f') +to interpolate the value(s), or use a callable (such as lambda v: v.isoformat()) +for more advanced formatting. Any entry which is not mentioned in the formatting +dict is output unchanged. + +Monitoring +---------- + +Although the HTML output takes pains to assign unique id's to each with +statistical data, you're probably better off fetching /cpstats/data, which +outputs the whole (extrapolated) `logging.statistics` dict in JSON format. +That is probably easier to parse, and doesn't have any formatting controls, +so you get the "original" data in a consistently-serialized format. +Note: there's no treatment yet for datetime objects. Try time.time() instead +for now if you can. Nagios will probably thank you. + +Turning Collection Off +---------------------- + +It is recommended each namespace have an "Enabled" item which, if False, +stops collection (but not reporting) of statistical data. Applications +SHOULD provide controls to pause and resume collection by setting these +entries to False or True, if present. + + +Usage +===== + +To collect statistics on CherryPy applications: + + from cherrypy.lib import cpstats + appconfig['/']['tools.cpstats.on'] = True + +To collect statistics on your own code: + + import logging + # Initialize the repository + if not hasattr(logging, 'statistics'): logging.statistics = {} + # Initialize my namespace + mystats = logging.statistics.setdefault('My Stuff', {}) + # Initialize my namespace's scalars and collections + mystats.update({ + 'Enabled': True, + 'Start Time': time.time(), + 'Important Events': 0, + 'Events/Second': lambda s: ( + (s['Important Events'] / (time.time() - s['Start Time']))), + }) + ... + for event in events: + ... + # Collect stats + if mystats.get('Enabled', False): + mystats['Important Events'] += 1 + +To report statistics: + + root.cpstats = cpstats.StatsPage() + +To format statistics reports: + + See 'Reporting', above. + +""" + +# -------------------------------- Statistics -------------------------------- # + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + +def extrapolate_statistics(scope): + """Return an extrapolated copy of the given scope.""" + c = {} + for k, v in list(scope.items()): + if isinstance(v, dict): + v = extrapolate_statistics(v) + elif isinstance(v, (list, tuple)): + v = [extrapolate_statistics(record) for record in v] + elif hasattr(v, '__call__'): + v = v(scope) + c[k] = v + return c + + +# --------------------- CherryPy Applications Statistics --------------------- # + +import threading +import time + +import cherrypy + +appstats = logging.statistics.setdefault('CherryPy Applications', {}) +appstats.update({ + 'Enabled': True, + 'Bytes Read/Request': lambda s: (s['Total Requests'] and + (s['Total Bytes Read'] / float(s['Total Requests'])) or 0.0), + 'Bytes Read/Second': lambda s: s['Total Bytes Read'] / s['Uptime'](s), + 'Bytes Written/Request': lambda s: (s['Total Requests'] and + (s['Total Bytes Written'] / float(s['Total Requests'])) or 0.0), + 'Bytes Written/Second': lambda s: s['Total Bytes Written'] / s['Uptime'](s), + 'Current Time': lambda s: time.time(), + 'Current Requests': 0, + 'Requests/Second': lambda s: float(s['Total Requests']) / s['Uptime'](s), + 'Server Version': cherrypy.__version__, + 'Start Time': time.time(), + 'Total Bytes Read': 0, + 'Total Bytes Written': 0, + 'Total Requests': 0, + 'Total Time': 0, + 'Uptime': lambda s: time.time() - s['Start Time'], + 'Requests': {}, + }) + +proc_time = lambda s: time.time() - s['Start Time'] + + +class ByteCountWrapper(object): + """Wraps a file-like object, counting the number of bytes read.""" + + def __init__(self, rfile): + self.rfile = rfile + self.bytes_read = 0 + + def read(self, size=-1): + data = self.rfile.read(size) + self.bytes_read += len(data) + return data + + def readline(self, size=-1): + data = self.rfile.readline(size) + self.bytes_read += len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + return data + + +average_uriset_time = lambda s: s['Count'] and (s['Sum'] / s['Count']) or 0 + + +class StatsTool(cherrypy.Tool): + """Record various information about the current request.""" + + def __init__(self): + cherrypy.Tool.__init__(self, 'on_end_request', self.record_stop) + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + if appstats.get('Enabled', False): + cherrypy.Tool._setup(self) + self.record_start() + + def record_start(self): + """Record the beginning of a request.""" + request = cherrypy.serving.request + if not hasattr(request.rfile, 'bytes_read'): + request.rfile = ByteCountWrapper(request.rfile) + request.body.fp = request.rfile + + r = request.remote + + appstats['Current Requests'] += 1 + appstats['Total Requests'] += 1 + appstats['Requests'][threading._get_ident()] = { + 'Bytes Read': None, + 'Bytes Written': None, + # Use a lambda so the ip gets updated by tools.proxy later + 'Client': lambda s: '%s:%s' % (r.ip, r.port), + 'End Time': None, + 'Processing Time': proc_time, + 'Request-Line': request.request_line, + 'Response Status': None, + 'Start Time': time.time(), + } + + def record_stop(self, uriset=None, slow_queries=1.0, slow_queries_count=100, + debug=False, **kwargs): + """Record the end of a request.""" + resp = cherrypy.serving.response + w = appstats['Requests'][threading._get_ident()] + + r = cherrypy.request.rfile.bytes_read + w['Bytes Read'] = r + appstats['Total Bytes Read'] += r + + if resp.stream: + w['Bytes Written'] = 'chunked' + else: + cl = int(resp.headers.get('Content-Length', 0)) + w['Bytes Written'] = cl + appstats['Total Bytes Written'] += cl + + w['Response Status'] = getattr(resp, 'output_status', None) or resp.status + + w['End Time'] = time.time() + p = w['End Time'] - w['Start Time'] + w['Processing Time'] = p + appstats['Total Time'] += p + + appstats['Current Requests'] -= 1 + + if debug: + cherrypy.log('Stats recorded: %s' % repr(w), 'TOOLS.CPSTATS') + + if uriset: + rs = appstats.setdefault('URI Set Tracking', {}) + r = rs.setdefault(uriset, { + 'Min': None, 'Max': None, 'Count': 0, 'Sum': 0, + 'Avg': average_uriset_time}) + if r['Min'] is None or p < r['Min']: + r['Min'] = p + if r['Max'] is None or p > r['Max']: + r['Max'] = p + r['Count'] += 1 + r['Sum'] += p + + if slow_queries and p > slow_queries: + sq = appstats.setdefault('Slow Queries', []) + sq.append(w.copy()) + if len(sq) > slow_queries_count: + sq.pop(0) + + +import cherrypy +cherrypy.tools.cpstats = StatsTool() + + +# ---------------------- CherryPy Statistics Reporting ---------------------- # + +import os +thisdir = os.path.abspath(os.path.dirname(__file__)) + +try: + import json +except ImportError: + try: + import simplejson as json + except ImportError: + json = None + + +missing = object() + +locale_date = lambda v: time.strftime('%c', time.gmtime(v)) +iso_format = lambda v: time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(v)) + +def pause_resume(ns): + def _pause_resume(enabled): + pause_disabled = '' + resume_disabled = '' + if enabled: + resume_disabled = 'disabled="disabled" ' + else: + pause_disabled = 'disabled="disabled" ' + return """ +
+ + +
+
+ + +
+ """ % (ns, pause_disabled, ns, resume_disabled) + return _pause_resume + + +class StatsPage(object): + + formatting = { + 'CherryPy Applications': { + 'Enabled': pause_resume('CherryPy Applications'), + 'Bytes Read/Request': '%.3f', + 'Bytes Read/Second': '%.3f', + 'Bytes Written/Request': '%.3f', + 'Bytes Written/Second': '%.3f', + 'Current Time': iso_format, + 'Requests/Second': '%.3f', + 'Start Time': iso_format, + 'Total Time': '%.3f', + 'Uptime': '%.3f', + 'Slow Queries': { + 'End Time': None, + 'Processing Time': '%.3f', + 'Start Time': iso_format, + }, + 'URI Set Tracking': { + 'Avg': '%.3f', + 'Max': '%.3f', + 'Min': '%.3f', + 'Sum': '%.3f', + }, + 'Requests': { + 'Bytes Read': '%s', + 'Bytes Written': '%s', + 'End Time': None, + 'Processing Time': '%.3f', + 'Start Time': None, + }, + }, + 'CherryPy WSGIServer': { + 'Enabled': pause_resume('CherryPy WSGIServer'), + 'Connections/second': '%.3f', + 'Start time': iso_format, + }, + } + + + def index(self): + # Transform the raw data into pretty output for HTML + yield """ + + + Statistics + + + +""" + for title, scalars, collections in self.get_namespaces(): + yield """ +

%s

+ + + +""" % title + for i, (key, value) in enumerate(scalars): + colnum = i % 3 + if colnum == 0: yield """ + """ + yield """ + """ % vars() + if colnum == 2: yield """ + """ + + if colnum == 0: yield """ + + + """ + elif colnum == 1: yield """ + + """ + yield """ + +
%(key)s%(value)s
""" + + for subtitle, headers, subrows in collections: + yield """ +

%s

+ + + """ % subtitle + for key in headers: + yield """ + """ % key + yield """ + + + """ + for subrow in subrows: + yield """ + """ + for value in subrow: + yield """ + """ % value + yield """ + """ + yield """ + +
%s
%s
""" + yield """ + + +""" + index.exposed = True + + def get_namespaces(self): + """Yield (title, scalars, collections) for each namespace.""" + s = extrapolate_statistics(logging.statistics) + for title, ns in sorted(s.items()): + scalars = [] + collections = [] + ns_fmt = self.formatting.get(title, {}) + for k, v in sorted(ns.items()): + fmt = ns_fmt.get(k, {}) + if isinstance(v, dict): + headers, subrows = self.get_dict_collection(v, fmt) + collections.append((k, ['ID'] + headers, subrows)) + elif isinstance(v, (list, tuple)): + headers, subrows = self.get_list_collection(v, fmt) + collections.append((k, headers, subrows)) + else: + format = ns_fmt.get(k, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v = format(v) + elif format is not missing: + v = format % v + scalars.append((k, v)) + yield title, scalars, collections + + def get_dict_collection(self, v, formatting): + """Return ([headers], [rows]) for the given collection.""" + # E.g., the 'Requests' dict. + headers = [] + for record in v.itervalues(): + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for k2, record in sorted(v.items()): + subrow = [k2] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + def get_list_collection(self, v, formatting): + """Return ([headers], [subrows]) for the given collection.""" + # E.g., the 'Slow Queries' list. + headers = [] + for record in v: + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for record in v: + subrow = [] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + if json is not None: + def data(self): + s = extrapolate_statistics(logging.statistics) + cherrypy.response.headers['Content-Type'] = 'application/json' + return json.dumps(s, sort_keys=True, indent=4) + data.exposed = True + + def pause(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = False + raise cherrypy.HTTPRedirect('./') + pause.exposed = True + pause.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + + def resume(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = True + raise cherrypy.HTTPRedirect('./') + resume.exposed = True + resume.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + diff --git a/src/cherrypy/lib/cptools.py b/src/cherrypy/lib/cptools.py index b54019cb81..b426a3e784 100644 --- a/src/cherrypy/lib/cptools.py +++ b/src/cherrypy/lib/cptools.py @@ -1,22 +1,16 @@ """Functions for builtin CherryPy tools.""" import logging - -try: - # Python 2.5+ - from hashlib import md5 -except ImportError: - from md5 import new as md5 - import re import cherrypy -from cherrypy.lib import http as _http +from cherrypy._cpcompat import basestring, ntob, md5, set +from cherrypy.lib import httputil as _httputil # Conditional HTTP request support # -def validate_etags(autotags=False): +def validate_etags(autotags=False, debug=False): """Validate the current ETag against If-Match, If-None-Match headers. If autotags is True, an ETag response-header value will be provided @@ -30,42 +24,62 @@ def validate_etags(autotags=False): use for entity tags in a possibly destructive fashion. Likewise, if you raise 304 Not Modified, the response body will be empty, the ETag hash will be incorrect, and your application will break. - See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24 + See :rfc:`2616` Section 14.24. """ - response = cherrypy.response + response = cherrypy.serving.response # Guard against being run twice. if hasattr(response, "ETag"): return - status, reason, msg = _http.valid_status(response.status) + status, reason, msg = _httputil.valid_status(response.status) etag = response.headers.get('ETag') # Automatic ETag generation. See warning in docstring. - if (not etag) and autotags: - if status == 200: - etag = response.collapse_body() - etag = '"%s"' % md5(etag).hexdigest() - response.headers['ETag'] = etag + if etag: + if debug: + cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS') + elif not autotags: + if debug: + cherrypy.log('Autotags off', 'TOOLS.ETAGS') + elif status != 200: + if debug: + cherrypy.log('Status not 200', 'TOOLS.ETAGS') + else: + etag = response.collapse_body() + etag = '"%s"' % md5(etag).hexdigest() + if debug: + cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS') + response.headers['ETag'] = etag response.ETag = etag # "If the request would, without the If-Match header field, result in # anything other than a 2xx or 412 status, then the If-Match header # MUST be ignored." + if debug: + cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS') if status >= 200 and status <= 299: - request = cherrypy.request + request = cherrypy.serving.request conditions = request.headers.elements('If-Match') or [] conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') if conditions and not (conditions == ["*"] or etag in conditions): raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did " "not match %r" % (etag, conditions)) conditions = request.headers.elements('If-None-Match') or [] conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-None-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') if conditions == ["*"] or etag in conditions: + if debug: + cherrypy.log('request.method: %s' % request.method, 'TOOLS.ETAGS') if request.method in ("GET", "HEAD"): raise cherrypy.HTTPRedirect([], 304) else: @@ -78,12 +92,12 @@ def validate_since(): If no code has set the Last-Modified response header, then no validation will be performed. """ - response = cherrypy.response + response = cherrypy.serving.response lastmod = response.headers.get('Last-Modified') if lastmod: - status, reason, msg = _http.valid_status(response.status) + status, reason, msg = _httputil.valid_status(response.status) - request = cherrypy.request + request = cherrypy.serving.request since = request.headers.get('If-Unmodified-Since') if since and since != lastmod: @@ -101,16 +115,51 @@ def validate_since(): # Tool code # +def allow(methods=None, debug=False): + """Raise 405 if request.method not in methods (default ['GET', 'HEAD']). + + The given methods are case-insensitive, and may be in any order. + If only one method is allowed, you may supply a single string; + if more than one, supply a list of strings. + + Regardless of whether the current method is allowed or not, this + also emits an 'Allow' response header, containing the given methods. + """ + if not isinstance(methods, (tuple, list)): + methods = [methods] + methods = [m.upper() for m in methods if m] + if not methods: + methods = ['GET', 'HEAD'] + elif 'GET' in methods and 'HEAD' not in methods: + methods.append('HEAD') + + cherrypy.response.headers['Allow'] = ', '.join(methods) + if cherrypy.request.method not in methods: + if debug: + cherrypy.log('request.method %r not in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + raise cherrypy.HTTPError(405) + else: + if debug: + cherrypy.log('request.method %r in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + + def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', - scheme='X-Forwarded-Proto'): + scheme='X-Forwarded-Proto', debug=False): """Change the base URL (scheme://host[:port][/path]). For running a CP server behind Apache, lighttpd, or other HTTP server. + For Apache and lighttpd, you should leave the 'local' argument at the + default value of 'X-Forwarded-Host'. For Squid, you probably want to set + tools.proxy.local = 'Origin'. + If you want the new request.base to include path info (not just the host), you must explicitly set base to the full base path, and ALSO set 'local' to '', so that the X-Forwarded-Host request header (which never includes - path info) does not override it. + path info) does not override it. Regardless, the value for 'base' MUST + NOT end in a slash. cherrypy.request.remote.ip (the IP address of the client) will be rewritten if the header specified by the 'remote' arg is valid. @@ -118,10 +167,12 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', want to rewrite remote.ip, set the 'remote' arg to an empty string. """ - request = cherrypy.request + request = cherrypy.serving.request if scheme: s = request.headers.get(scheme, None) + if debug: + cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY') if s == 'on' and 'ssl' in scheme.lower(): # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header scheme = 'https' @@ -132,9 +183,13 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', scheme = request.base[:request.base.find("://")] if local: - base = request.headers.get(local, base) + lbase = request.headers.get(local, None) + if debug: + cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY') + if lbase is not None: + base = lbase.split(',')[0] if not base: - port = cherrypy.request.local.port + port = request.local.port if port == 80: base = '127.0.0.1' else: @@ -148,6 +203,8 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', if remote: xff = request.headers.get(remote) + if debug: + cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY') if xff: if remote == 'X-Forwarded-For': # See http://bob.pythonmac.org/archives/2005/09/23/apache-x-forwarded-for-caveat/ @@ -155,42 +212,64 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', request.remote.ip = xff -def ignore_headers(headers=('Range',)): +def ignore_headers(headers=('Range',), debug=False): """Delete request headers whose field names are included in 'headers'. This is a useful tool for working behind certain HTTP servers; for example, Apache duplicates the work that CP does for 'Range' headers, and will doubly-truncate the response. """ - request = cherrypy.request + request = cherrypy.serving.request for name in headers: if name in request.headers: + if debug: + cherrypy.log('Ignoring request header %r' % name, + 'TOOLS.IGNORE_HEADERS') del request.headers[name] -def response_headers(headers=None): +def response_headers(headers=None, debug=False): """Set headers on the response.""" + if debug: + cherrypy.log('Setting response headers: %s' % repr(headers), + 'TOOLS.RESPONSE_HEADERS') for name, value in (headers or []): - cherrypy.response.headers[name] = value + cherrypy.serving.response.headers[name] = value response_headers.failsafe = True def referer(pattern, accept=True, accept_missing=False, error=403, - message='Forbidden Referer header.'): + message='Forbidden Referer header.', debug=False): """Raise HTTPError if Referer header does/does not match the given pattern. - pattern: a regular expression pattern to test against the Referer. - accept: if True, the Referer must match the pattern; if False, + pattern + A regular expression pattern to test against the Referer. + + accept + If True, the Referer must match the pattern; if False, the Referer must NOT match the pattern. - accept_missing: if True, permit requests with no Referer header. - error: the HTTP error code to return to the client on failure. - message: a string to include in the response body on failure. + + accept_missing + If True, permit requests with no Referer header. + + error + The HTTP error code to return to the client on failure. + + message + A string to include in the response body on failure. + """ try: - match = bool(re.match(pattern, cherrypy.request.headers['Referer'])) + ref = cherrypy.serving.request.headers['Referer'] + match = bool(re.match(pattern, ref)) + if debug: + cherrypy.log('Referer %r matches %r' % (ref, pattern), + 'TOOLS.REFERER') if accept == match: return except KeyError: + if debug: + cherrypy.log('No Referer header', 'TOOLS.REFERER') if accept_missing: return @@ -201,6 +280,7 @@ class SessionAuth(object): """Assert that the user is logged in.""" session_key = "username" + debug = False def check_username_and_password(self, username, password): pass @@ -219,7 +299,7 @@ class SessionAuth(object): pass def login_screen(self, from_page='..', username='', error_msg='', **kwargs): - return """ + return ntob(""" Message: %(error_msg)s
Login:
@@ -228,20 +308,22 @@ Message: %(error_msg)s
""" % {'from_page': from_page, 'username': username, - 'error_msg': error_msg} + 'error_msg': error_msg}, "utf-8") def do_login(self, username, password, from_page='..', **kwargs): """Login. May raise redirect, or return True if request handled.""" + response = cherrypy.serving.response error_msg = self.check_username_and_password(username, password) if error_msg: body = self.login_screen(from_page, username, error_msg) - cherrypy.response.body = body - if cherrypy.response.headers.has_key("Content-Length"): + response.body = body + if "Content-Length" in response.headers: # Delete Content-Length header so finalize() recalcs it. - del cherrypy.response.headers["Content-Length"] + del response.headers["Content-Length"] return True else: - cherrypy.session[self.session_key] = cherrypy.request.login = username + cherrypy.serving.request.login = username + cherrypy.session[self.session_key] = username self.on_login(username) raise cherrypy.HTTPRedirect(from_page or "/") @@ -251,43 +333,70 @@ Message: %(error_msg)s username = sess.get(self.session_key) sess[self.session_key] = None if username: - cherrypy.request.login = None + cherrypy.serving.request.login = None self.on_logout(username) raise cherrypy.HTTPRedirect(from_page) def do_check(self): """Assert username. May raise redirect, or return True if request handled.""" sess = cherrypy.session - request = cherrypy.request + request = cherrypy.serving.request + response = cherrypy.serving.response username = sess.get(self.session_key) if not username: sess[self.session_key] = username = self.anonymous() + if self.debug: + cherrypy.log('No session[username], trying anonymous', 'TOOLS.SESSAUTH') if not username: - cherrypy.response.body = self.login_screen(cherrypy.url(qs=request.query_string)) - if cherrypy.response.headers.has_key("Content-Length"): + url = cherrypy.url(qs=request.query_string) + if self.debug: + cherrypy.log('No username, routing to login_screen with ' + 'from_page %r' % url, 'TOOLS.SESSAUTH') + response.body = self.login_screen(url) + if "Content-Length" in response.headers: # Delete Content-Length header so finalize() recalcs it. - del cherrypy.response.headers["Content-Length"] + del response.headers["Content-Length"] return True - cherrypy.request.login = username + if self.debug: + cherrypy.log('Setting request.login to %r' % username, 'TOOLS.SESSAUTH') + request.login = username self.on_check(username) def run(self): - request = cherrypy.request + request = cherrypy.serving.request + response = cherrypy.serving.response + path = request.path_info if path.endswith('login_screen'): + if self.debug: + cherrypy.log('routing %r to login_screen' % path, 'TOOLS.SESSAUTH') return self.login_screen(**request.params) elif path.endswith('do_login'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + if self.debug: + cherrypy.log('do_login requires POST', 'TOOLS.SESSAUTH') + raise cherrypy.HTTPError(405) + if self.debug: + cherrypy.log('routing %r to do_login' % path, 'TOOLS.SESSAUTH') return self.do_login(**request.params) elif path.endswith('do_logout'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + raise cherrypy.HTTPError(405) + if self.debug: + cherrypy.log('routing %r to do_logout' % path, 'TOOLS.SESSAUTH') return self.do_logout(**request.params) else: + if self.debug: + cherrypy.log('No special path, running do_check', 'TOOLS.SESSAUTH') return self.do_check() def session_auth(**kwargs): sa = SessionAuth() - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(sa, k, v) return sa.run() session_auth.__doc__ = """Session authentication hook. @@ -299,59 +408,69 @@ to this function: for k in dir(SessionAuth) if not k.startswith("__")]) -def log_traceback(severity=logging.DEBUG): +def log_traceback(severity=logging.ERROR, debug=False): """Write the last error's traceback to the cherrypy error log.""" cherrypy.log("", "HTTP", severity=severity, traceback=True) -def log_request_headers(): +def log_request_headers(debug=False): """Write request headers to the cherrypy error log.""" - h = [" %s: %s" % (k, v) for k, v in cherrypy.request.header_list] + h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list] cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP") -def log_hooks(): +def log_hooks(debug=False): """Write request.hooks to the cherrypy error log.""" + request = cherrypy.serving.request + msg = [] # Sort by the standard points if possible. from cherrypy import _cprequest points = _cprequest.hookpoints - for k in cherrypy.request.hooks.keys(): + for k in request.hooks.keys(): if k not in points: points.append(k) for k in points: msg.append(" %s:" % k) - v = cherrypy.request.hooks.get(k, []) + v = request.hooks.get(k, []) v.sort() for h in v: msg.append(" %r" % h) cherrypy.log('\nRequest Hooks for ' + cherrypy.url() + ':\n' + '\n'.join(msg), "HTTP") -def redirect(url='', internal=True): +def redirect(url='', internal=True, debug=False): """Raise InternalRedirect or HTTPRedirect to the given url.""" + if debug: + cherrypy.log('Redirecting %sto: %s' % + ({True: 'internal ', False: ''}[internal], url), + 'TOOLS.REDIRECT') if internal: raise cherrypy.InternalRedirect(url) else: raise cherrypy.HTTPRedirect(url) -def trailing_slash(missing=True, extra=False): +def trailing_slash(missing=True, extra=False, status=None, debug=False): """Redirect if path_info has (missing|extra) trailing slash.""" - request = cherrypy.request + request = cherrypy.serving.request pi = request.path_info + if debug: + cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' % + (request.is_index, missing, extra, pi), + 'TOOLS.TRAILING_SLASH') if request.is_index is True: if missing: if not pi.endswith('/'): new_url = cherrypy.url(pi + '/', request.query_string) - raise cherrypy.HTTPRedirect(new_url) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) elif request.is_index is False: if extra: # If pi == '/', don't redirect to ''! if pi.endswith('/') and pi != '/': new_url = cherrypy.url(pi[:-1], request.query_string) - raise cherrypy.HTTPRedirect(new_url) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) -def flatten(): +def flatten(debug=False): """Wrap response.body in a generator that recursively iterates over body. This allows cherrypy.response.body to consist of 'nested generators'; @@ -359,23 +478,28 @@ def flatten(): """ import types def flattener(input): + numchunks = 0 for x in input: if not isinstance(x, types.GeneratorType): + numchunks += 1 yield x else: for y in flattener(x): - yield y - response = cherrypy.response + numchunks += 1 + yield y + if debug: + cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN') + response = cherrypy.serving.response response.body = flattener(response.body) -def accept(media=None): +def accept(media=None, debug=False): """Return the client's preferred media-type (from the given Content-Types). If 'media' is None (the default), no test will be performed. If 'media' is provided, it should be the Content-Type value (as a string) - or values (as a list or tuple of strings) which the current request + or values (as a list or tuple of strings) which the current resource can emit. The client's acceptable media ranges (as declared in the Accept request header) will be matched in order to these Content-Type values; the first such string is returned. That is, the return value @@ -397,12 +521,15 @@ def accept(media=None): return if isinstance(media, basestring): media = [media] + request = cherrypy.serving.request # Parse the Accept request header, and try to match one # of the requested media-ranges (in order of preference). - ranges = cherrypy.request.headers.elements('Accept') + ranges = request.headers.elements('Accept') if not ranges: # Any media type is acceptable. + if debug: + cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT') return media[0] else: # Note that 'ranges' is sorted in order of preference @@ -410,20 +537,28 @@ def accept(media=None): if element.qvalue > 0: if element.value == "*/*": # Matches any type or subtype + if debug: + cherrypy.log('Match due to */*', 'TOOLS.ACCEPT') return media[0] elif element.value.endswith("/*"): # Matches any subtype mtype = element.value[:-1] # Keep the slash for m in media: if m.startswith(mtype): + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') return m else: # Matches exact value if element.value in media: + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') return element.value # No suitable media-range found. - ah = cherrypy.request.headers.get('Accept') + ah = request.headers.get('Accept') if ah is None: msg = "Your client did not send an Accept header." else: @@ -432,3 +567,51 @@ def accept(media=None): ", ".join(media)) raise cherrypy.HTTPError(406, msg) + +class MonitoredHeaderMap(_httputil.HeaderMap): + + def __init__(self): + self.accessed_headers = set() + + def __getitem__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__getitem__(self, key) + + def __contains__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__contains__(self, key) + + def get(self, key, default=None): + self.accessed_headers.add(key) + return _httputil.HeaderMap.get(self, key, default=default) + + if hasattr({}, 'has_key'): + # Python 2 + def has_key(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.has_key(self, key) + + +def autovary(ignore=None, debug=False): + """Auto-populate the Vary response header based on request.header access.""" + request = cherrypy.serving.request + + req_h = request.headers + request.headers = MonitoredHeaderMap() + request.headers.update(req_h) + if ignore is None: + ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type']) + + def set_response_header(): + resp_h = cherrypy.serving.response.headers + v = set([e.value for e in resp_h.elements('Vary')]) + if debug: + cherrypy.log('Accessed headers: %s' % request.headers.accessed_headers, + 'TOOLS.AUTOVARY') + v = v.union(request.headers.accessed_headers) + v = v.difference(ignore) + v = list(v) + v.sort() + resp_h['Vary'] = ', '.join(v) + request.hooks.attach('before_finalize', set_response_header, 95) + diff --git a/src/cherrypy/lib/encoding.py b/src/cherrypy/lib/encoding.py index 94dc908e08..6459746509 100644 --- a/src/cherrypy/lib/encoding.py +++ b/src/cherrypy/lib/encoding.py @@ -2,173 +2,237 @@ import struct import time import cherrypy +from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr +from cherrypy.lib import file_generator +from cherrypy.lib import set_vary_header def decode(encoding=None, default_encoding='utf-8'): - """Decode cherrypy.request.params from str to unicode objects.""" - if not encoding: - ct = cherrypy.request.headers.elements("Content-Type") + """Replace or extend the list of charsets used to decode a request entity. + + Either argument may be a single string or a list of strings. + + encoding + If not None, restricts the set of charsets attempted while decoding + a request entity to the given set (even if a different charset is given in + the Content-Type request header). + + default_encoding + Only in effect if the 'encoding' argument is not given. + If given, the set of charsets attempted while decoding a request entity is + *extended* with the given value(s). + + """ + body = cherrypy.request.body + if encoding is not None: + if not isinstance(encoding, list): + encoding = [encoding] + body.attempt_charsets = encoding + elif default_encoding: + if not isinstance(default_encoding, list): + default_encoding = [default_encoding] + body.attempt_charsets = body.attempt_charsets + default_encoding + + +class ResponseEncoder: + + default_encoding = 'utf-8' + failmsg = "Response body could not be encoded with %r." + encoding = None + errors = 'strict' + text_only = True + add_charset = True + debug = False + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + self.attempted_charsets = set() + request = cherrypy.serving.request + if request.handler is not None: + # Replace request.handler with self + if self.debug: + cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') + self.oldhandler = request.handler + request.handler = self + + def encode_stream(self, encoding): + """Encode a streaming response body. + + Use a generator wrapper, and just pray it works as the stream is + being written out. + """ + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + def encoder(body): + for chunk in body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + yield chunk + self.body = encoder(self.body) + return True + + def encode_string(self, encoding): + """Encode a buffered response body.""" + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + try: + body = [] + for chunk in self.body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + body.append(chunk) + self.body = body + except (LookupError, UnicodeError): + return False + else: + return True + + def find_acceptable_charset(self): + request = cherrypy.serving.request + response = cherrypy.serving.response + + if self.debug: + cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE') + if response.stream: + encoder = self.encode_stream + else: + encoder = self.encode_string + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + # Encoded strings may be of different lengths from their + # unicode equivalents, and even from each other. For example: + # >>> t = u"\u7007\u3040" + # >>> len(t) + # 2 + # >>> len(t.encode("UTF-8")) + # 6 + # >>> len(t.encode("utf7")) + # 8 + del response.headers["Content-Length"] + + # Parse the Accept-Charset request header, and try to provide one + # of the requested charsets (in order of user preference). + encs = request.headers.elements('Accept-Charset') + charsets = [enc.value.lower() for enc in encs] + if self.debug: + cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') + + if self.encoding is not None: + # If specified, force this encoding to be used, or fail. + encoding = self.encoding.lower() + if self.debug: + cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE') + if (not charsets) or "*" in charsets or encoding in charsets: + if self.debug: + cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + else: + if not encs: + if self.debug: + cherrypy.log('Attempting default encoding %r' % + self.default_encoding, 'TOOLS.ENCODE') + # Any character-set is acceptable. + if encoder(self.default_encoding): + return self.default_encoding + else: + raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding) + else: + for element in encs: + if element.qvalue > 0: + if element.value == "*": + # Matches any charset. Try our default. + if self.debug: + cherrypy.log('Attempting default encoding due ' + 'to %r' % element, 'TOOLS.ENCODE') + if encoder(self.default_encoding): + return self.default_encoding + else: + encoding = element.value + if self.debug: + cherrypy.log('Attempting encoding %s (qvalue >' + '0)' % element, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + + if "*" not in charsets: + # If no "*" is present in an Accept-Charset field, then all + # character sets not explicitly mentioned get a quality + # value of 0, except for ISO-8859-1, which gets a quality + # value of 1 if not explicitly mentioned. + iso = 'iso-8859-1' + if iso not in charsets: + if self.debug: + cherrypy.log('Attempting ISO-8859-1 encoding', + 'TOOLS.ENCODE') + if encoder(iso): + return iso + + # No suitable encoding found. + ac = request.headers.get('Accept-Charset') + if ac is None: + msg = "Your client did not send an Accept-Charset header." + else: + msg = "Your client sent this Accept-Charset header: %s." % ac + msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets) + raise cherrypy.HTTPError(406, msg) + + def __call__(self, *args, **kwargs): + response = cherrypy.serving.response + self.body = self.oldhandler(*args, **kwargs) + + if isinstance(self.body, basestring): + # strings get wrapped in a list because iterating over a single + # item list is much faster than iterating over every character + # in a long string. + if self.body: + self.body = [self.body] + else: + # [''] doesn't evaluate to False, so replace it with []. + self.body = [] + elif hasattr(self.body, 'read'): + self.body = file_generator(self.body) + elif self.body is None: + self.body = [] + + ct = response.headers.elements("Content-Type") + if self.debug: + cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE') if ct: ct = ct[0] - encoding = ct.params.get("charset", None) - if (not encoding) and ct.value.lower().startswith("text/"): - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 - # When no explicit charset parameter is provided by the - # sender, media subtypes of the "text" type are defined - # to have a default charset value of "ISO-8859-1" when - # received via HTTP. - encoding = "ISO-8859-1" - - if not encoding: - encoding = default_encoding - - try: - decode_params(encoding) - except UnicodeDecodeError: - # IE and Firefox don't supply a charset when submitting form - # params with a CT of application/x-www-form-urlencoded. - # So after all our guessing, it could *still* be wrong. - # Start over with ISO-8859-1, since that seems to be preferred. - decode_params("ISO-8859-1") - -def decode_params(encoding): - decoded_params = {} - for key, value in cherrypy.request.params.items(): - if not hasattr(value, 'file'): - # Skip the value if it is an uploaded file - if isinstance(value, list): - # value is a list: decode each element - value = [v.decode(encoding) for v in value] - elif isinstance(value, str): - # value is a regular string: decode it - value = value.decode(encoding) - decoded_params[key] = value - - # Decode all or nothing, so we can try again on error. - cherrypy.request.params = decoded_params - - -# Encoding - -def encode(encoding=None, errors='strict', text_only=True, add_charset=True): - # Guard against running twice - if getattr(cherrypy.request, "_encoding_attempted", False): - return - cherrypy.request._encoding_attempted = True - - ct = cherrypy.response.headers.elements("Content-Type") - if ct: - ct = ct[0] - if (not text_only) or ct.value.lower().startswith("text/"): - # Set "charset=..." param on response Content-Type header - ct.params['charset'] = find_acceptable_charset(encoding, errors=errors) - if add_charset: - cherrypy.response.headers["Content-Type"] = str(ct) - -def encode_stream(encoding, errors='strict'): - """Encode a streaming response body. - - Use a generator wrapper, and just pray it works as the stream is - being written out. - """ - def encoder(body): - for chunk in body: - if isinstance(chunk, unicode): - chunk = chunk.encode(encoding, errors) - yield chunk - cherrypy.response.body = encoder(cherrypy.response.body) - return True - -def encode_string(encoding, errors='strict'): - """Encode a buffered response body.""" - try: - body = [] - for chunk in cherrypy.response.body: - if isinstance(chunk, unicode): - chunk = chunk.encode(encoding, errors) - body.append(chunk) - cherrypy.response.body = body - except (LookupError, UnicodeError): - return False - else: - return True - -def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'): - response = cherrypy.response - - if cherrypy.response.stream: - encoder = encode_stream - else: - response.collapse_body() - encoder = encode_string - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - # Encoded strings may be of different lengths from their - # unicode equivalents, and even from each other. For example: - # >>> t = u"\u7007\u3040" - # >>> len(t) - # 2 - # >>> len(t.encode("UTF-8")) - # 6 - # >>> len(t.encode("utf7")) - # 8 - del response.headers["Content-Length"] - - # Parse the Accept-Charset request header, and try to provide one - # of the requested charsets (in order of user preference). - encs = cherrypy.request.headers.elements('Accept-Charset') - charsets = [enc.value.lower() for enc in encs] - attempted_charsets = [] - - if encoding is not None: - # If specified, force this encoding to be used, or fail. - encoding = encoding.lower() - if (not charsets) or "*" in charsets or encoding in charsets: - if encoder(encoding, errors): - return encoding - else: - if not encs: - # Any character-set is acceptable. - if encoder(default_encoding, errors): - return default_encoding + if self.text_only: + if ct.value.lower().startswith("text/"): + if self.debug: + cherrypy.log('Content-Type %s starts with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = True + else: + if self.debug: + cherrypy.log('Not finding because Content-Type %s does ' + 'not start with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = False else: - raise cherrypy.HTTPError(500, failmsg % default_encoding) - else: - if "*" not in charsets: - # If no "*" is present in an Accept-Charset field, then all - # character sets not explicitly mentioned get a quality - # value of 0, except for ISO-8859-1, which gets a quality - # value of 1 if not explicitly mentioned. - iso = 'iso-8859-1' - if iso not in charsets: - attempted_charsets.append(iso) - if encoder(iso, errors): - return iso + if self.debug: + cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE') + do_find = True - for element in encs: - if element.qvalue > 0: - if element.value == "*": - # Matches any charset. Try our default. - if default_encoding not in attempted_charsets: - attempted_charsets.append(default_encoding) - if encoder(default_encoding, errors): - return default_encoding - else: - encoding = element.value - if encoding not in attempted_charsets: - attempted_charsets.append(encoding) - if encoder(encoding, errors): - return encoding - - # No suitable encoding found. - ac = cherrypy.request.headers.get('Accept-Charset') - if ac is None: - msg = "Your client did not send an Accept-Charset header." - else: - msg = "Your client sent this Accept-Charset header: %s." % ac - msg += " We tried these charsets: %s." % ", ".join(attempted_charsets) - raise cherrypy.HTTPError(406, msg) - + if do_find: + # Set "charset=..." param on response Content-Type header + ct.params['charset'] = self.find_acceptable_charset() + if self.add_charset: + if self.debug: + cherrypy.log('Setting Content-Type %s' % ct, + 'TOOLS.ENCODE') + response.headers["Content-Type"] = str(ct) + + return self.body # GZIP @@ -176,14 +240,16 @@ def compress(body, compress_level): """Compress 'body' at the given compress_level.""" import zlib - yield '\037\213' # magic header - yield '\010' # compression method - yield '\0' - yield struct.pack(" 0 is present * The 'identity' value is given with a qvalue > 0. + """ - response = cherrypy.response + request = cherrypy.serving.request + response = cherrypy.serving.response + + set_vary_header(response, "Accept-Encoding") + if not response.body: # Response body is empty (might be a 304 for instance) + if debug: + cherrypy.log('No response body', context='TOOLS.GZIP') return # If returning cached content (which should already have been gzipped), # don't re-zip. - if getattr(cherrypy.request, "cached", False): + if getattr(request, "cached", False): + if debug: + cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') return - acceptable = cherrypy.request.headers.elements('Accept-Encoding') + acceptable = request.headers.elements('Accept-Encoding') if not acceptable: # If no Accept-Encoding field is present in a request, # the server MAY assume that the client will accept any @@ -239,27 +322,67 @@ def gzip(compress_level=9, mime_types=['text/html', 'text/plain']): # the "identity" content-coding, unless it has additional # information that a different content-coding is meaningful # to the client. + if debug: + cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') return ct = response.headers.get('Content-Type', '').split(';')[0] for coding in acceptable: if coding.value == 'identity' and coding.qvalue != 0: + if debug: + cherrypy.log('Non-zero identity qvalue: %s' % coding, + context='TOOLS.GZIP') return if coding.value in ('gzip', 'x-gzip'): if coding.qvalue == 0: + if debug: + cherrypy.log('Zero gzip qvalue: %s' % coding, + context='TOOLS.GZIP') return - if ct in mime_types: - # Return a generator that compresses the page - varies = response.headers.get("Vary", "") - varies = [x.strip() for x in varies.split(",") if x.strip()] - if "Accept-Encoding" not in varies: - varies.append("Accept-Encoding") - response.headers['Vary'] = ", ".join(varies) - - response.headers['Content-Encoding'] = 'gzip' - response.body = compress(response.body, compress_level) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] + + if ct not in mime_types: + # If the list of provided mime-types contains tokens + # such as 'text/*' or 'application/*+xml', + # we go through them and find the most appropriate one + # based on the given content-type. + # The pattern matching is only caring about the most + # common cases, as stated above, and doesn't support + # for extra parameters. + found = False + if '/' in ct: + ct_media_type, ct_sub_type = ct.split('/') + for mime_type in mime_types: + if '/' in mime_type: + media_type, sub_type = mime_type.split('/') + if ct_media_type == media_type: + if sub_type == '*': + found = True + break + elif '+' in sub_type and '+' in ct_sub_type: + ct_left, ct_right = ct_sub_type.split('+') + left, right = sub_type.split('+') + if left == '*' and ct_right == right: + found = True + break + + if not found: + if debug: + cherrypy.log('Content-Type %s not in mime_types %r' % + (ct, mime_types), context='TOOLS.GZIP') + return + + if debug: + cherrypy.log('Gzipping', context='TOOLS.GZIP') + # Return a generator that compresses the page + response.headers['Content-Encoding'] = 'gzip' + response.body = compress(response.body, compress_level) + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return + + if debug: + cherrypy.log('No acceptable encoding found.', context='GZIP') cherrypy.HTTPError(406, "identity, gzip").set_response() + diff --git a/src/cherrypy/lib/gctools.py b/src/cherrypy/lib/gctools.py new file mode 100644 index 0000000000..183148b212 --- /dev/null +++ b/src/cherrypy/lib/gctools.py @@ -0,0 +1,214 @@ +import gc +import inspect +import os +import sys +import time + +try: + import objgraph +except ImportError: + objgraph = None + +import cherrypy +from cherrypy import _cprequest, _cpwsgi +from cherrypy.process.plugins import SimplePlugin + + +class ReferrerTree(object): + """An object which gathers all referrers of an object to a given depth.""" + + peek_length = 40 + + def __init__(self, ignore=None, maxdepth=2, maxparents=10): + self.ignore = ignore or [] + self.ignore.append(inspect.currentframe().f_back) + self.maxdepth = maxdepth + self.maxparents = maxparents + + def ascend(self, obj, depth=1): + """Return a nested list containing referrers of the given object.""" + depth += 1 + parents = [] + + # Gather all referrers in one step to minimize + # cascading references due to repr() logic. + refs = gc.get_referrers(obj) + self.ignore.append(refs) + if len(refs) > self.maxparents: + return [("[%s referrers]" % len(refs), [])] + + try: + ascendcode = self.ascend.__code__ + except AttributeError: + ascendcode = self.ascend.im_func.func_code + for parent in refs: + if inspect.isframe(parent) and parent.f_code is ascendcode: + continue + if parent in self.ignore: + continue + if depth <= self.maxdepth: + parents.append((parent, self.ascend(parent, depth))) + else: + parents.append((parent, [])) + + return parents + + def peek(self, s): + """Return s, restricted to a sane length.""" + if len(s) > (self.peek_length + 3): + half = self.peek_length // 2 + return s[:half] + '...' + s[-half:] + else: + return s + + def _format(self, obj, descend=True): + """Return a string representation of a single object.""" + if inspect.isframe(obj): + filename, lineno, func, context, index = inspect.getframeinfo(obj) + return "" % func + + if not descend: + return self.peek(repr(obj)) + + if isinstance(obj, dict): + return "{" + ", ".join(["%s: %s" % (self._format(k, descend=False), + self._format(v, descend=False)) + for k, v in obj.items()]) + "}" + elif isinstance(obj, list): + return "[" + ", ".join([self._format(item, descend=False) + for item in obj]) + "]" + elif isinstance(obj, tuple): + return "(" + ", ".join([self._format(item, descend=False) + for item in obj]) + ")" + + r = self.peek(repr(obj)) + if isinstance(obj, (str, int, float)): + return r + return "%s: %s" % (type(obj), r) + + def format(self, tree): + """Return a list of string reprs from a nested list of referrers.""" + output = [] + def ascend(branch, depth=1): + for parent, grandparents in branch: + output.append((" " * depth) + self._format(parent)) + if grandparents: + ascend(grandparents, depth + 1) + ascend(tree) + return output + + +def get_instances(cls): + return [x for x in gc.get_objects() if isinstance(x, cls)] + + +class RequestCounter(SimplePlugin): + + def start(self): + self.count = 0 + + def before_request(self): + self.count += 1 + + def after_request(self): + self.count -=1 +request_counter = RequestCounter(cherrypy.engine) +request_counter.subscribe() + + +def get_context(obj): + if isinstance(obj, _cprequest.Request): + return "path=%s;stage=%s" % (obj.path_info, obj.stage) + elif isinstance(obj, _cprequest.Response): + return "status=%s" % obj.status + elif isinstance(obj, _cpwsgi.AppResponse): + return "PATH_INFO=%s" % obj.environ.get('PATH_INFO', '') + elif hasattr(obj, "tb_lineno"): + return "tb_lineno=%s" % obj.tb_lineno + return "" + + +class GCRoot(object): + """A CherryPy page handler for testing reference leaks.""" + + classes = [(_cprequest.Request, 2, 2, + "Should be 1 in this request thread and 1 in the main thread."), + (_cprequest.Response, 2, 2, + "Should be 1 in this request thread and 1 in the main thread."), + (_cpwsgi.AppResponse, 1, 1, + "Should be 1 in this request thread only."), + ] + + def index(self): + return "Hello, world!" + index.exposed = True + + def stats(self): + output = ["Statistics:"] + + for trial in range(10): + if request_counter.count > 0: + break + time.sleep(0.5) + else: + output.append("\nNot all requests closed properly.") + + # gc_collect isn't perfectly synchronous, because it may + # break reference cycles that then take time to fully + # finalize. Call it thrice and hope for the best. + gc.collect() + gc.collect() + unreachable = gc.collect() + if unreachable: + if objgraph is not None: + final = objgraph.by_type('Nondestructible') + if final: + objgraph.show_backrefs(final, filename='finalizers.png') + + trash = {} + for x in gc.garbage: + trash[type(x)] = trash.get(type(x), 0) + 1 + if trash: + output.insert(0, "\n%s unreachable objects:" % unreachable) + trash = [(v, k) for k, v in trash.items()] + trash.sort() + for pair in trash: + output.append(" " + repr(pair)) + + # Check declared classes to verify uncollected instances. + # These don't have to be part of a cycle; they can be + # any objects that have unanticipated referrers that keep + # them from being collected. + allobjs = {} + for cls, minobj, maxobj, msg in self.classes: + allobjs[cls] = get_instances(cls) + + for cls, minobj, maxobj, msg in self.classes: + objs = allobjs[cls] + lenobj = len(objs) + if lenobj < minobj or lenobj > maxobj: + if minobj == maxobj: + output.append( + "\nExpected %s %r references, got %s." % + (minobj, cls, lenobj)) + else: + output.append( + "\nExpected %s to %s %r references, got %s." % + (minobj, maxobj, cls, lenobj)) + + for obj in objs: + if objgraph is not None: + ig = [id(objs), id(inspect.currentframe())] + fname = "graph_%s_%s.png" % (cls.__name__, id(obj)) + objgraph.show_backrefs( + obj, extra_ignore=ig, max_depth=4, too_many=20, + filename=fname, extra_info=get_context) + output.append("\nReferrers for %s (refcount=%s):" % + (repr(obj), sys.getrefcount(obj))) + t = ReferrerTree(ignore=[objs], maxdepth=3) + tree = t.ascend(obj) + output.extend(t.format(tree)) + + return "\n".join(output) + stats.exposed = True + diff --git a/src/cherrypy/lib/http.py b/src/cherrypy/lib/http.py index 82dfa5bf80..4661d69e28 100644 --- a/src/cherrypy/lib/http.py +++ b/src/cherrypy/lib/http.py @@ -1,405 +1,7 @@ -"""HTTP library functions.""" +import warnings +warnings.warn('cherrypy.lib.http has been deprecated and will be removed ' + 'in CherryPy 3.3 use cherrypy.lib.httputil instead.', + DeprecationWarning) -# This module contains functions for building an HTTP application -# framework: any one, not just one whose name starts with "Ch". ;) If you -# reference any modules from some popular framework inside *this* module, -# FuManChu will personally hang you up by your thumbs and submit you -# to a public caning. +from cherrypy.lib.httputil import * -from BaseHTTPServer import BaseHTTPRequestHandler -response_codes = BaseHTTPRequestHandler.responses.copy() - -# From http://www.cherrypy.org/ticket/361 -response_codes[500] = ('Internal Server Error', - 'The server encountered an unexpected condition ' - 'which prevented it from fulfilling the request.') -response_codes[503] = ('Service Unavailable', - 'The server is currently unable to handle the ' - 'request due to a temporary overloading or ' - 'maintenance of the server.') - - -import cgi -import re -from rfc822 import formatdate as HTTPDate - - -def urljoin(*atoms): - """Return the given path *atoms, joined into a single URL. - - This will correctly join a SCRIPT_NAME and PATH_INFO into the - original URL, even if either atom is blank. - """ - url = "/".join([x for x in atoms if x]) - while "//" in url: - url = url.replace("//", "/") - # Special-case the final url of "", and return "/" instead. - return url or "/" - -def protocol_from_http(protocol_str): - """Return a protocol tuple from the given 'HTTP/x.y' string.""" - return int(protocol_str[5]), int(protocol_str[7]) - -def get_ranges(headervalue, content_length): - """Return a list of (start, stop) indices from a Range header, or None. - - Each (start, stop) tuple will be composed of two ints, which are suitable - for use in a slicing operation. That is, the header "Range: bytes=3-6", - if applied against a Python string, is requesting resource[3:7]. This - function will return the list [(3, 7)]. - - If this function returns an empty list, you should return HTTP 416. - """ - - if not headervalue: - return None - - result = [] - bytesunit, byteranges = headervalue.split("=", 1) - for brange in byteranges.split(","): - start, stop = [x.strip() for x in brange.split("-", 1)] - if start: - if not stop: - stop = content_length - 1 - start, stop = map(int, (start, stop)) - if start >= content_length: - # From rfc 2616 sec 14.16: - # "If the server receives a request (other than one - # including an If-Range request-header field) with an - # unsatisfiable Range request-header field (that is, - # all of whose byte-range-spec values have a first-byte-pos - # value greater than the current length of the selected - # resource), it SHOULD return a response code of 416 - # (Requested range not satisfiable)." - continue - if stop < start: - # From rfc 2616 sec 14.16: - # "If the server ignores a byte-range-spec because it - # is syntactically invalid, the server SHOULD treat - # the request as if the invalid Range header field - # did not exist. (Normally, this means return a 200 - # response containing the full entity)." - return None - result.append((start, stop + 1)) - else: - if not stop: - # See rfc quote above. - return None - # Negative subscript (last N bytes) - result.append((content_length - int(stop), content_length)) - - return result - - -class HeaderElement(object): - """An element (with parameters) from an HTTP header's element list.""" - - def __init__(self, value, params=None): - self.value = value - if params is None: - params = {} - self.params = params - - def __unicode__(self): - p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()] - return u"%s%s" % (self.value, "".join(p)) - - def __str__(self): - return str(self.__unicode__()) - - def parse(elementstr): - """Transform 'token;key=val' to ('token', {'key': 'val'}).""" - # Split the element into a value and parameters. The 'value' may - # be of the form, "token=token", but we don't split that here. - atoms = [x.strip() for x in elementstr.split(";") if x.strip()] - initial_value = atoms.pop(0).strip() - params = {} - for atom in atoms: - atom = [x.strip() for x in atom.split("=", 1) if x.strip()] - key = atom.pop(0) - if atom: - val = atom[0] - else: - val = "" - params[key] = val - return initial_value, params - parse = staticmethod(parse) - - def from_str(cls, elementstr): - """Construct an instance from a string of the form 'token;key=val'.""" - ival, params = cls.parse(elementstr) - return cls(ival, params) - from_str = classmethod(from_str) - - -q_separator = re.compile(r'; *q *=') - -class AcceptElement(HeaderElement): - """An element (with parameters) from an Accept* header's element list. - - AcceptElement objects are comparable; the more-preferred object will be - "less than" the less-preferred object. They are also therefore sortable; - if you sort a list of AcceptElement objects, they will be listed in - priority order; the most preferred value will be first. Yes, it should - have been the other way around, but it's too late to fix now. - """ - - def from_str(cls, elementstr): - qvalue = None - # The first "q" parameter (if any) separates the initial - # media-range parameter(s) (if any) from the accept-params. - atoms = q_separator.split(elementstr, 1) - media_range = atoms.pop(0).strip() - if atoms: - # The qvalue for an Accept header can have extensions. The other - # headers cannot, but it's easier to parse them as if they did. - qvalue = HeaderElement.from_str(atoms[0].strip()) - - media_type, params = cls.parse(media_range) - if qvalue is not None: - params["q"] = qvalue - return cls(media_type, params) - from_str = classmethod(from_str) - - def qvalue(self): - val = self.params.get("q", "1") - if isinstance(val, HeaderElement): - val = val.value - return float(val) - qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") - - def __cmp__(self, other): - diff = cmp(other.qvalue, self.qvalue) - if diff == 0: - diff = cmp(str(other), str(self)) - return diff - - -def header_elements(fieldname, fieldvalue): - """Return a HeaderElement list from a comma-separated header str.""" - - if not fieldvalue: - return None - headername = fieldname.lower() - - result = [] - for element in fieldvalue.split(","): - if headername.startswith("accept") or headername == 'te': - hv = AcceptElement.from_str(element) - else: - hv = HeaderElement.from_str(element) - result.append(hv) - - result.sort() - return result - -def decode_TEXT(value): - """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr").""" - from email.Header import decode_header - atoms = decode_header(value) - decodedvalue = "" - for atom, charset in atoms: - if charset is not None: - atom = atom.decode(charset) - decodedvalue += atom - return decodedvalue - -def valid_status(status): - """Return legal HTTP status Code, Reason-phrase and Message. - - The status arg must be an int, or a str that begins with an int. - - If status is an int, or a str and no reason-phrase is supplied, - a default reason-phrase will be provided. - """ - - if not status: - status = 200 - - status = str(status) - parts = status.split(" ", 1) - if len(parts) == 1: - # No reason supplied. - code, = parts - reason = None - else: - code, reason = parts - reason = reason.strip() - - try: - code = int(code) - except ValueError: - raise ValueError("Illegal response status from server " - "(%s is non-numeric)." % repr(code)) - - if code < 100 or code > 599: - raise ValueError("Illegal response status from server " - "(%s is out of range)." % repr(code)) - - if code not in response_codes: - # code is unknown but not illegal - default_reason, message = "", "" - else: - default_reason, message = response_codes[code] - - if reason is None: - reason = default_reason - - return code, reason, message - - -image_map_pattern = re.compile(r"[0-9]+,[0-9]+") - -def parse_query_string(query_string, keep_blank_values=True): - """Build a params dictionary from a query_string. - - Duplicate key/value pairs in the provided query_string will be - returned as {'key': [val1, val2, ...]}. Single key/values will - be returned as strings: {'key': 'value'}. - """ - if image_map_pattern.match(query_string): - # Server-side image map. Map the coords to 'x' and 'y' - # (like CGI::Request does). - pm = query_string.split(",") - pm = {'x': int(pm[0]), 'y': int(pm[1])} - else: - pm = cgi.parse_qs(query_string, keep_blank_values) - for key, val in pm.items(): - if len(val) == 1: - pm[key] = val[0] - return pm - -def params_from_CGI_form(form): - params = {} - for key in form.keys(): - value_list = form[key] - if isinstance(value_list, list): - params[key] = [] - for item in value_list: - if item.filename is not None: - value = item # It's a file upload - else: - value = item.value # It's a regular field - params[key].append(value) - else: - if value_list.filename is not None: - value = value_list # It's a file upload - else: - value = value_list.value # It's a regular field - params[key] = value - return params - - -class CaseInsensitiveDict(dict): - """A case-insensitive dict subclass. - - Each key is changed on entry to str(key).title(). - """ - - def __getitem__(self, key): - return dict.__getitem__(self, str(key).title()) - - def __setitem__(self, key, value): - dict.__setitem__(self, str(key).title(), value) - - def __delitem__(self, key): - dict.__delitem__(self, str(key).title()) - - def __contains__(self, key): - return dict.__contains__(self, str(key).title()) - - def get(self, key, default=None): - return dict.get(self, str(key).title(), default) - - def has_key(self, key): - return dict.has_key(self, str(key).title()) - - def update(self, E): - for k in E.keys(): - self[str(k).title()] = E[k] - - def fromkeys(cls, seq, value=None): - newdict = cls() - for k in seq: - newdict[str(k).title()] = value - return newdict - fromkeys = classmethod(fromkeys) - - def setdefault(self, key, x=None): - key = str(key).title() - try: - return self[key] - except KeyError: - self[key] = x - return x - - def pop(self, key, default): - return dict.pop(self, str(key).title(), default) - - -class HeaderMap(CaseInsensitiveDict): - """A dict subclass for HTTP request and response headers. - - Each key is changed on entry to str(key).title(). This allows headers - to be case-insensitive and avoid duplicates. - - Values are header values (decoded according to RFC 2047 if necessary). - """ - - def elements(self, key): - """Return a list of HeaderElements for the given header (or None).""" - key = str(key).title() - h = self.get(key) - if h is None: - return [] - return header_elements(key, h) - - def output(self, protocol=(1, 1)): - """Transform self into a list of (name, value) tuples.""" - header_list = [] - for key, v in self.iteritems(): - if isinstance(v, unicode): - # HTTP/1.0 says, "Words of *TEXT may contain octets - # from character sets other than US-ASCII." and - # "Recipients of header field TEXT containing octets - # outside the US-ASCII character set may assume that - # they represent ISO-8859-1 characters." - try: - v = v.encode("iso-8859-1") - except UnicodeEncodeError: - if protocol >= (1, 1): - # Encode RFC-2047 TEXT - # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). - from email.Header import Header - v = Header(v, 'utf-8').encode() - else: - raise - else: - # This coercion should not take any time at all - # if value is already of type "str". - v = str(v) - header_list.append((key, v)) - return header_list - - - -class Host(object): - """An internet address. - - name should be the client's host name. If not available (because no DNS - lookup is performed), the IP address should be used instead. - """ - - ip = "0.0.0.0" - port = 80 - name = "unknown.tld" - - def __init__(self, ip, port, name=None): - self.ip = ip - self.port = port - if name is None: - name = ip - self.name = name - - def __repr__(self): - return "http.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/src/cherrypy/lib/httpauth.py b/src/cherrypy/lib/httpauth.py index f5d87d2b43..be87a785de 100644 --- a/src/cherrypy/lib/httpauth.py +++ b/src/cherrypy/lib/httpauth.py @@ -1,10 +1,9 @@ """ -httpauth modules defines functions to implement HTTP Digest Authentication (RFC 2617). +This module defines functions to implement HTTP Digest Authentication (:rfc:`2617`). This has full compliance with 'Digest' and 'Basic' authentication methods. In 'Digest' it supports both MD5 and MD5-sess algorithms. Usage: - First use 'doAuth' to request the client authentication for a certain resource. You should send an httplib.UNAUTHORIZED response to the client so he knows he has to authenticate itself. @@ -59,16 +58,9 @@ __all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse", "calculateNonce", "SUPPORTED_QOP") ################################################################################ - -try: - # Python 2.5+ - from hashlib import md5 -except ImportError: - from md5 import new as md5 - import time -import base64 -import urllib2 +from cherrypy._cpcompat import base64_decode, ntob, md5 +from cherrypy._cpcompat import parse_http_list, parse_keqv_list MD5 = "MD5" MD5_SESS = "MD5-sess" @@ -82,10 +74,10 @@ SUPPORTED_QOP = (AUTH, AUTH_INT) # doAuth # DIGEST_AUTH_ENCODERS = { - MD5: lambda val: md5(val).hexdigest(), + MD5: lambda val: md5(ntob(val)).hexdigest(), 'md5': lambda val:md5(val).hexdigest(), # Added by Kovid - MD5_SESS: lambda val: md5(val).hexdigest(), -# SHA: lambda val: sha(val).hexdigest(), + MD5_SESS: lambda val: md5(ntob(val)).hexdigest(), +# SHA: lambda val: sha.new(ntob(val)).hexdigest (), } def calculateNonce (realm, algorithm = MD5): @@ -137,32 +129,32 @@ def doAuth (realm): # def _parseDigestAuthorization (auth_params): # Convert the auth params to a dict - items = urllib2.parse_http_list (auth_params) - params = urllib2.parse_keqv_list (items) + items = parse_http_list(auth_params) + params = parse_keqv_list(items) # Now validate the params # Check for required parameters required = ["username", "realm", "nonce", "uri", "response"] for k in required: - if not params.has_key(k): + if k not in params: return None # If qop is sent then cnonce and nc MUST be present - if params.has_key("qop") and not (params.has_key("cnonce") \ - and params.has_key("nc")): + if "qop" in params and not ("cnonce" in params \ + and "nc" in params): return None # If qop is not sent, neither cnonce nor nc can be present - if (params.has_key("cnonce") or params.has_key("nc")) and \ - not params.has_key("qop"): + if ("cnonce" in params or "nc" in params) and \ + "qop" not in params: return None return params def _parseBasicAuthorization (auth_params): - username, password = base64.decodestring (auth_params).split (":", 1) + username, password = base64_decode(auth_params).split(":", 1) return {"username": username, "password": password} AUTH_SCHEMES = { @@ -343,19 +335,18 @@ def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs): other arguments that each implementation might need. If the response is of type 'Basic' then the function has the following - signature: + signature:: - checkBasicResponse (auth_map, password) -> bool + checkBasicResponse (auth_map, password) -> bool If the response is of type 'Digest' then the function has the following - signature: + signature:: - checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool + checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool The 'A1' argument is only used in MD5_SESS algorithm based responses. Check md5SessionKey() for more info. """ - global AUTH_RESPONSES checker = AUTH_RESPONSES[auth_map["auth_scheme"]] return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs) diff --git a/src/cherrypy/lib/httputil.py b/src/cherrypy/lib/httputil.py new file mode 100644 index 0000000000..5f77d54748 --- /dev/null +++ b/src/cherrypy/lib/httputil.py @@ -0,0 +1,506 @@ +"""HTTP library functions. + +This module contains functions for building an HTTP application +framework: any one, not just one whose name starts with "Ch". ;) If you +reference any modules from some popular framework inside *this* module, +FuManChu will personally hang you up by your thumbs and submit you +to a public caning. +""" + +from binascii import b2a_base64 +from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, reversed, sorted +from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr, unicodestr, unquote_qs +response_codes = BaseHTTPRequestHandler.responses.copy() + +# From http://www.cherrypy.org/ticket/361 +response_codes[500] = ('Internal Server Error', + 'The server encountered an unexpected condition ' + 'which prevented it from fulfilling the request.') +response_codes[503] = ('Service Unavailable', + 'The server is currently unable to handle the ' + 'request due to a temporary overloading or ' + 'maintenance of the server.') + +import re +import urllib + + + +def urljoin(*atoms): + """Return the given path \*atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = "/".join([x for x in atoms if x]) + while "//" in url: + url = url.replace("//", "/") + # Special-case the final url of "", and return "/" instead. + return url or "/" + +def urljoin_bytes(*atoms): + """Return the given path *atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = ntob("/").join([x for x in atoms if x]) + while ntob("//") in url: + url = url.replace(ntob("//"), ntob("/")) + # Special-case the final url of "", and return "/" instead. + return url or ntob("/") + +def protocol_from_http(protocol_str): + """Return a protocol tuple from the given 'HTTP/x.y' string.""" + return int(protocol_str[5]), int(protocol_str[7]) + +def get_ranges(headervalue, content_length): + """Return a list of (start, stop) indices from a Range header, or None. + + Each (start, stop) tuple will be composed of two ints, which are suitable + for use in a slicing operation. That is, the header "Range: bytes=3-6", + if applied against a Python string, is requesting resource[3:7]. This + function will return the list [(3, 7)]. + + If this function returns an empty list, you should return HTTP 416. + """ + + if not headervalue: + return None + + result = [] + bytesunit, byteranges = headervalue.split("=", 1) + for brange in byteranges.split(","): + start, stop = [x.strip() for x in brange.split("-", 1)] + if start: + if not stop: + stop = content_length - 1 + start, stop = int(start), int(stop) + if start >= content_length: + # From rfc 2616 sec 14.16: + # "If the server receives a request (other than one + # including an If-Range request-header field) with an + # unsatisfiable Range request-header field (that is, + # all of whose byte-range-spec values have a first-byte-pos + # value greater than the current length of the selected + # resource), it SHOULD return a response code of 416 + # (Requested range not satisfiable)." + continue + if stop < start: + # From rfc 2616 sec 14.16: + # "If the server ignores a byte-range-spec because it + # is syntactically invalid, the server SHOULD treat + # the request as if the invalid Range header field + # did not exist. (Normally, this means return a 200 + # response containing the full entity)." + return None + result.append((start, stop + 1)) + else: + if not stop: + # See rfc quote above. + return None + # Negative subscript (last N bytes) + result.append((content_length - int(stop), content_length)) + + return result + + +class HeaderElement(object): + """An element (with parameters) from an HTTP header's element list.""" + + def __init__(self, value, params=None): + self.value = value + if params is None: + params = {} + self.params = params + + def __cmp__(self, other): + return cmp(self.value, other.value) + + def __lt__(self, other): + return self.value < other.value + + def __str__(self): + p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] + return "%s%s" % (self.value, "".join(p)) + + def __bytes__(self): + return ntob(self.__str__()) + + def __unicode__(self): + return ntou(self.__str__()) + + def parse(elementstr): + """Transform 'token;key=val' to ('token', {'key': 'val'}).""" + # Split the element into a value and parameters. The 'value' may + # be of the form, "token=token", but we don't split that here. + atoms = [x.strip() for x in elementstr.split(";") if x.strip()] + if not atoms: + initial_value = '' + else: + initial_value = atoms.pop(0).strip() + params = {} + for atom in atoms: + atom = [x.strip() for x in atom.split("=", 1) if x.strip()] + key = atom.pop(0) + if atom: + val = atom[0] + else: + val = "" + params[key] = val + return initial_value, params + parse = staticmethod(parse) + + def from_str(cls, elementstr): + """Construct an instance from a string of the form 'token;key=val'.""" + ival, params = cls.parse(elementstr) + return cls(ival, params) + from_str = classmethod(from_str) + + +q_separator = re.compile(r'; *q *=') + +class AcceptElement(HeaderElement): + """An element (with parameters) from an Accept* header's element list. + + AcceptElement objects are comparable; the more-preferred object will be + "less than" the less-preferred object. They are also therefore sortable; + if you sort a list of AcceptElement objects, they will be listed in + priority order; the most preferred value will be first. Yes, it should + have been the other way around, but it's too late to fix now. + """ + + def from_str(cls, elementstr): + qvalue = None + # The first "q" parameter (if any) separates the initial + # media-range parameter(s) (if any) from the accept-params. + atoms = q_separator.split(elementstr, 1) + media_range = atoms.pop(0).strip() + if atoms: + # The qvalue for an Accept header can have extensions. The other + # headers cannot, but it's easier to parse them as if they did. + qvalue = HeaderElement.from_str(atoms[0].strip()) + + media_type, params = cls.parse(media_range) + if qvalue is not None: + params["q"] = qvalue + return cls(media_type, params) + from_str = classmethod(from_str) + + def qvalue(self): + val = self.params.get("q", "1") + if isinstance(val, HeaderElement): + val = val.value + return float(val) + qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") + + def __cmp__(self, other): + diff = cmp(self.qvalue, other.qvalue) + if diff == 0: + diff = cmp(str(self), str(other)) + return diff + + def __lt__(self, other): + if self.qvalue == other.qvalue: + return str(self) < str(other) + else: + return self.qvalue < other.qvalue + + +def header_elements(fieldname, fieldvalue): + """Return a sorted HeaderElement list from a comma-separated header string.""" + if not fieldvalue: + return [] + + result = [] + for element in fieldvalue.split(","): + if fieldname.startswith("Accept") or fieldname == 'TE': + hv = AcceptElement.from_str(element) + else: + hv = HeaderElement.from_str(element) + result.append(hv) + + return list(reversed(sorted(result))) + +def decode_TEXT(value): + r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr").""" + try: + # Python 3 + from email.header import decode_header + except ImportError: + from email.Header import decode_header + atoms = decode_header(value) + decodedvalue = "" + for atom, charset in atoms: + if charset is not None: + atom = atom.decode(charset) + decodedvalue += atom + return decodedvalue + +def valid_status(status): + """Return legal HTTP status Code, Reason-phrase and Message. + + The status arg must be an int, or a str that begins with an int. + + If status is an int, or a str and no reason-phrase is supplied, + a default reason-phrase will be provided. + """ + + if not status: + status = 200 + + status = str(status) + parts = status.split(" ", 1) + if len(parts) == 1: + # No reason supplied. + code, = parts + reason = None + else: + code, reason = parts + reason = reason.strip() + + try: + code = int(code) + except ValueError: + raise ValueError("Illegal response status from server " + "(%s is non-numeric)." % repr(code)) + + if code < 100 or code > 599: + raise ValueError("Illegal response status from server " + "(%s is out of range)." % repr(code)) + + if code not in response_codes: + # code is unknown but not illegal + default_reason, message = "", "" + else: + default_reason, message = response_codes[code] + + if reason is None: + reason = default_reason + + return code, reason, message + + +# NOTE: the parse_qs functions that follow are modified version of those +# in the python3.0 source - we need to pass through an encoding to the unquote +# method, but the default parse_qs function doesn't allow us to. These do. + +def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + Returns a dict, as G-d intended. + """ + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + d = {} + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = unquote_qs(nv[0], encoding) + value = unquote_qs(nv[1], encoding) + if name in d: + if not isinstance(d[name], list): + d[name] = [d[name]] + d[name].append(value) + else: + d[name] = value + return d + + +image_map_pattern = re.compile(r"[0-9]+,[0-9]+") + +def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): + """Build a params dictionary from a query_string. + + Duplicate key/value pairs in the provided query_string will be + returned as {'key': [val1, val2, ...]}. Single key/values will + be returned as strings: {'key': 'value'}. + """ + if image_map_pattern.match(query_string): + # Server-side image map. Map the coords to 'x' and 'y' + # (like CGI::Request does). + pm = query_string.split(",") + pm = {'x': int(pm[0]), 'y': int(pm[1])} + else: + pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) + return pm + + +class CaseInsensitiveDict(dict): + """A case-insensitive dict subclass. + + Each key is changed on entry to str(key).title(). + """ + + def __getitem__(self, key): + return dict.__getitem__(self, str(key).title()) + + def __setitem__(self, key, value): + dict.__setitem__(self, str(key).title(), value) + + def __delitem__(self, key): + dict.__delitem__(self, str(key).title()) + + def __contains__(self, key): + return dict.__contains__(self, str(key).title()) + + def get(self, key, default=None): + return dict.get(self, str(key).title(), default) + + if hasattr({}, 'has_key'): + def has_key(self, key): + return dict.has_key(self, str(key).title()) + + def update(self, E): + for k in E.keys(): + self[str(k).title()] = E[k] + + def fromkeys(cls, seq, value=None): + newdict = cls() + for k in seq: + newdict[str(k).title()] = value + return newdict + fromkeys = classmethod(fromkeys) + + def setdefault(self, key, x=None): + key = str(key).title() + try: + return self[key] + except KeyError: + self[key] = x + return x + + def pop(self, key, default): + return dict.pop(self, str(key).title(), default) + + +# TEXT = +# +# A CRLF is allowed in the definition of TEXT only as part of a header +# field continuation. It is expected that the folding LWS will be +# replaced with a single SP before interpretation of the TEXT value." +if nativestr == bytestr: + header_translate_table = ''.join([chr(i) for i in xrange(256)]) + header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(127) +else: + header_translate_table = None + header_translate_deletechars = bytes(range(32)) + bytes([127]) + + +class HeaderMap(CaseInsensitiveDict): + """A dict subclass for HTTP request and response headers. + + Each key is changed on entry to str(key).title(). This allows headers + to be case-insensitive and avoid duplicates. + + Values are header values (decoded according to :rfc:`2047` if necessary). + """ + + protocol=(1, 1) + encodings = ["ISO-8859-1"] + + # Someday, when http-bis is done, this will probably get dropped + # since few servers, clients, or intermediaries do it. But until then, + # we're going to obey the spec as is. + # "Words of *TEXT MAY contain characters from character sets other than + # ISO-8859-1 only when encoded according to the rules of RFC 2047." + use_rfc_2047 = True + + def elements(self, key): + """Return a sorted list of HeaderElements for the given header.""" + key = str(key).title() + value = self.get(key) + return header_elements(key, value) + + def values(self, key): + """Return a sorted list of HeaderElement.value for the given header.""" + return [e.value for e in self.elements(key)] + + def output(self): + """Transform self into a list of (name, value) tuples.""" + header_list = [] + for k, v in self.items(): + if isinstance(k, unicodestr): + k = self.encode(k) + + if not isinstance(v, basestring): + v = str(v) + + if isinstance(v, unicodestr): + v = self.encode(v) + + # See header_translate_* constants above. + # Replace only if you really know what you're doing. + k = k.translate(header_translate_table, header_translate_deletechars) + v = v.translate(header_translate_table, header_translate_deletechars) + + header_list.append((k, v)) + return header_list + + def encode(self, v): + """Return the given header name or value, encoded for HTTP output.""" + for enc in self.encodings: + try: + return v.encode(enc) + except UnicodeEncodeError: + continue + + if self.protocol == (1, 1) and self.use_rfc_2047: + # Encode RFC-2047 TEXT + # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). + # We do our own here instead of using the email module + # because we never want to fold lines--folding has + # been deprecated by the HTTP working group. + v = b2a_base64(v.encode('utf-8')) + return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) + + raise ValueError("Could not encode header part %r using " + "any of the encodings %r." % + (v, self.encodings)) + + +class Host(object): + """An internet address. + + name + Should be the client's host name. If not available (because no DNS + lookup is performed), the IP address should be used instead. + + """ + + ip = "0.0.0.0" + port = 80 + name = "unknown.tld" + + def __init__(self, ip, port, name=None): + self.ip = ip + self.port = port + if name is None: + name = ip + self.name = name + + def __repr__(self): + return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/src/cherrypy/lib/jsontools.py b/src/cherrypy/lib/jsontools.py new file mode 100644 index 0000000000..209257914f --- /dev/null +++ b/src/cherrypy/lib/jsontools.py @@ -0,0 +1,87 @@ +import sys +import cherrypy +from cherrypy._cpcompat import basestring, ntou, json, json_encode, json_decode + +def json_processor(entity): + """Read application/json data into request.json.""" + if not entity.headers.get(ntou("Content-Length"), ntou("")): + raise cherrypy.HTTPError(411) + + body = entity.fp.read() + try: + cherrypy.serving.request.json = json_decode(body.decode('utf-8')) + except ValueError: + raise cherrypy.HTTPError(400, 'Invalid JSON document') + +def json_in(content_type=[ntou('application/json'), ntou('text/javascript')], + force=True, debug=False, processor = json_processor): + """Add a processor to parse JSON request entities: + The default processor places the parsed data into request.json. + + Incoming request entities which match the given content_type(s) will + be deserialized from JSON to the Python equivalent, and the result + stored at cherrypy.request.json. The 'content_type' argument may + be a Content-Type string or a list of allowable Content-Type strings. + + If the 'force' argument is True (the default), then entities of other + content types will not be allowed; "415 Unsupported Media Type" is + raised instead. + + Supply your own processor to use a custom decoder, or to handle the parsed + data differently. The processor can be configured via + tools.json_in.processor or via the decorator method. + + Note that the deserializer requires the client send a Content-Length + request header, or it will raise "411 Length Required". If for any + other reason the request entity cannot be deserialized from JSON, + it will raise "400 Bad Request: Invalid JSON document". + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if isinstance(content_type, basestring): + content_type = [content_type] + + if force: + if debug: + cherrypy.log('Removing body processors %s' % + repr(request.body.processors.keys()), 'TOOLS.JSON_IN') + request.body.processors.clear() + request.body.default_proc = cherrypy.HTTPError( + 415, 'Expected an entity of content type %s' % + ', '.join(content_type)) + + for ct in content_type: + if debug: + cherrypy.log('Adding body processor for %s' % ct, 'TOOLS.JSON_IN') + request.body.processors[ct] = processor + +def json_handler(*args, **kwargs): + value = cherrypy.serving.request._json_inner_handler(*args, **kwargs) + return json_encode(value) + +def json_out(content_type='application/json', debug=False, handler=json_handler): + """Wrap request.handler to serialize its output to JSON. Sets Content-Type. + + If the given content_type is None, the Content-Type response header + is not set. + + Provide your own handler to use a custom encoder. For example + cherrypy.config['tools.json_out.handler'] = , or + @json_out(handler=function). + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if debug: + cherrypy.log('Replacing %s with JSON handler' % request.handler, + 'TOOLS.JSON_OUT') + request._json_inner_handler = request.handler + request.handler = handler + if content_type is not None: + if debug: + cherrypy.log('Setting Content-Type to %s' % content_type, 'TOOLS.JSON_OUT') + cherrypy.serving.response.headers['Content-Type'] = content_type + diff --git a/src/cherrypy/lib/profiler.py b/src/cherrypy/lib/profiler.py index 704fec47a5..785d58a302 100644 --- a/src/cherrypy/lib/profiler.py +++ b/src/cherrypy/lib/profiler.py @@ -3,7 +3,7 @@ CherryPy users ============== -You can profile any of your pages as follows: +You can profile any of your pages as follows:: from cherrypy.lib import profiler @@ -19,25 +19,23 @@ You can profile any of your pages as follows: cherrypy.tree.mount(Root()) - You can also turn on profiling for all requests -using the make_app function as WSGI middleware. - +using the ``make_app`` function as WSGI middleware. CherryPy developers =================== This module can be used whenever you make changes to CherryPy, to get a quick sanity-check on overall CP performance. Use the -"--profile" flag when running the test suite. Then, use the serve() +``--profile`` flag when running the test suite. Then, use the ``serve()`` function to browse the results in a web browser. If you run this -module from the command line, it will call serve() for you. +module from the command line, it will call ``serve()`` for you. """ -# Make profiler output more readable by adding __init__ modules' parents. def new_func_strip_path(func_name): + """Make profiler output more readable by adding ``__init__`` modules' parents""" filename, line, name = func_name if filename.endswith("__init__.py"): return os.path.basename(filename[:-12]) + filename[-12:], line, name @@ -50,21 +48,12 @@ try: except ImportError: profile = None pstats = None - import warnings - msg = ("Your installation of Python does not have a profile module. " - "If you're on Debian, you can apt-get python2.4-profiler from " - "non-free in a separate step. See http://www.cherrypy.org/wiki/" - "ProfilingOnDebian for details.") - warnings.warn(msg) import os, os.path import sys +import warnings -try: - import cStringIO as StringIO -except ImportError: - import StringIO - +from cherrypy._cpcompat import BytesIO _count = 0 @@ -88,13 +77,15 @@ class Profiler(object): return result def statfiles(self): - """statfiles() -> list of available profiles.""" + """:rtype: list of available profiles. + """ return [f for f in os.listdir(self.path) if f.startswith("cp_") and f.endswith(".prof")] def stats(self, filename, sortby='cumulative'): - """stats(index) -> output of print_stats() for the given profile.""" - sio = StringIO.StringIO() + """:rtype stats(index): output of print_stats() for the given profile. + """ + sio = BytesIO() if sys.version_info >= (2, 5): s = pstats.Stats(os.path.join(self.path, filename), stream=sio) s.strip_dirs() @@ -162,13 +153,25 @@ class make_app: def __init__(self, nextapp, path=None, aggregate=False): """Make a WSGI middleware app which wraps 'nextapp' with profiling. - nextapp: the WSGI application to wrap, usually an instance of + nextapp + the WSGI application to wrap, usually an instance of cherrypy.Application. - path: where to dump the profiling output. - aggregate: if True, profile data for all HTTP requests will go in + + path + where to dump the profiling output. + + aggregate + if True, profile data for all HTTP requests will go in a single file. If False (the default), each HTTP request will dump its profile data into a separate file. + """ + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try `sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian for details.") + warnings.warn(msg) + self.nextapp = nextapp self.aggregate = aggregate if aggregate: @@ -186,6 +189,12 @@ class make_app: def serve(path=None, port=8080): + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try `sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian for details.") + warnings.warn(msg) + import cherrypy cherrypy.config.update({'server.socket_port': int(port), 'server.thread_pool': 10, diff --git a/src/cherrypy/lib/reprconf.py b/src/cherrypy/lib/reprconf.py new file mode 100644 index 0000000000..ba8ff51e41 --- /dev/null +++ b/src/cherrypy/lib/reprconf.py @@ -0,0 +1,485 @@ +"""Generic configuration system using unrepr. + +Configuration data may be supplied as a Python dictionary, as a filename, +or as an open file object. When you supply a filename or file, Python's +builtin ConfigParser is used (with some extensions). + +Namespaces +---------- + +Configuration keys are separated into namespaces by the first "." in the key. + +The only key that cannot exist in a namespace is the "environment" entry. +This special entry 'imports' other config entries from a template stored in +the Config.environments dict. + +You can define your own namespaces to be called when new config is merged +by adding a named handler to Config.namespaces. The name can be any string, +and the handler must be either a callable or a context manager. +""" + +try: + # Python 3.0+ + from configparser import ConfigParser +except ImportError: + from ConfigParser import ConfigParser + +try: + set +except NameError: + from sets import Set as set + +try: + basestring +except NameError: + basestring = str + +try: + # Python 3 + import builtins +except ImportError: + # Python 2 + import __builtin__ as builtins + +import operator as _operator +import sys + +def as_dict(config): + """Return a dict from 'config' whether it is a dict, file, or filename.""" + if isinstance(config, basestring): + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + config = Parser().dict_from_file(config) + return config + + +class NamespaceSet(dict): + """A dict of config namespace names and handlers. + + Each config entry should begin with a namespace name; the corresponding + namespace handler will be called once for each config entry in that + namespace, and will be passed two arguments: the config key (with the + namespace removed) and the config value. + + Namespace handlers may be any Python callable; they may also be + Python 2.5-style 'context managers', in which case their __enter__ + method should return a callable to be used as the handler. + See cherrypy.tools (the Toolbox class) for an example. + """ + + def __call__(self, config): + """Iterate through config and pass it to each namespace handler. + + config + A flat dict, where keys use dots to separate + namespaces, and values are arbitrary. + + The first name in each config key is used to look up the corresponding + namespace handler. For example, a config entry of {'tools.gzip.on': v} + will call the 'tools' namespace handler with the args: ('gzip.on', v) + """ + # Separate the given config into namespaces + ns_confs = {} + for k in config: + if "." in k: + ns, name = k.split(".", 1) + bucket = ns_confs.setdefault(ns, {}) + bucket[name] = config[k] + + # I chose __enter__ and __exit__ so someday this could be + # rewritten using Python 2.5's 'with' statement: + # for ns, handler in self.iteritems(): + # with handler as callable: + # for k, v in ns_confs.get(ns, {}).iteritems(): + # callable(k, v) + for ns, handler in self.items(): + exit = getattr(handler, "__exit__", None) + if exit: + callable = handler.__enter__() + no_exc = True + try: + try: + for k, v in ns_confs.get(ns, {}).items(): + callable(k, v) + except: + # The exceptional case is handled here + no_exc = False + if exit is None: + raise + if not exit(*sys.exc_info()): + raise + # The exception is swallowed if exit() returns true + finally: + # The normal and non-local-goto cases are handled here + if no_exc and exit: + exit(None, None, None) + else: + for k, v in ns_confs.get(ns, {}).items(): + handler(k, v) + + def __repr__(self): + return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, + dict.__repr__(self)) + + def __copy__(self): + newobj = self.__class__() + newobj.update(self) + return newobj + copy = __copy__ + + +class Config(dict): + """A dict-like set of configuration data, with defaults and namespaces. + + May take a file, filename, or dict. + """ + + defaults = {} + environments = {} + namespaces = NamespaceSet() + + def __init__(self, file=None, **kwargs): + self.reset() + if file is not None: + self.update(file) + if kwargs: + self.update(kwargs) + + def reset(self): + """Reset self to default values.""" + self.clear() + dict.update(self, self.defaults) + + def update(self, config): + """Update self from a dict, file or filename.""" + if isinstance(config, basestring): + # Filename + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + # Open file object + config = Parser().dict_from_file(config) + else: + config = config.copy() + self._apply(config) + + def _apply(self, config): + """Update self from a dict.""" + which_env = config.get('environment') + if which_env: + env = self.environments[which_env] + for k in env: + if k not in config: + config[k] = env[k] + + dict.update(self, config) + self.namespaces(config) + + def __setitem__(self, k, v): + dict.__setitem__(self, k, v) + self.namespaces({k: v}) + + +class Parser(ConfigParser): + """Sub-class of ConfigParser that keeps the case of options and that + raises an exception if the file cannot be read. + """ + + def optionxform(self, optionstr): + return optionstr + + def read(self, filenames): + if isinstance(filenames, basestring): + filenames = [filenames] + for filename in filenames: + # try: + # fp = open(filename) + # except IOError: + # continue + fp = open(filename) + try: + self._read(fp, filename) + finally: + fp.close() + + def as_dict(self, raw=False, vars=None): + """Convert an INI file to a dictionary""" + # Load INI file into a dict + result = {} + for section in self.sections(): + if section not in result: + result[section] = {} + for option in self.options(section): + value = self.get(section, option, raw=raw, vars=vars) + try: + value = unrepr(value) + except Exception: + x = sys.exc_info()[1] + msg = ("Config error in section: %r, option: %r, " + "value: %r. Config values must be valid Python." % + (section, option, value)) + raise ValueError(msg, x.__class__.__name__, x.args) + result[section][option] = value + return result + + def dict_from_file(self, file): + if hasattr(file, 'read'): + self.readfp(file) + else: + self.read(file) + return self.as_dict() + + +# public domain "unrepr" implementation, found on the web and then improved. + + +class _Builder2: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def astnode(self, s): + """Return a Python2 ast Node compiled from a string.""" + try: + import compiler + except ImportError: + # Fallback to eval when compiler package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = compiler.parse("__tempvalue__ = " + s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + def build_Subscript(self, o): + expr, flags, subs = o.getChildren() + expr = self.build(expr) + subs = self.build(subs) + return expr[subs] + + def build_CallFunc(self, o): + children = map(self.build, o.getChildren()) + callee = children.pop(0) + kwargs = children.pop() or {} + starargs = children.pop() or () + args = tuple(children) + tuple(starargs) + return callee(*args, **kwargs) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.name + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + return getattr(builtins, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_Add(self, o): + left, right = map(self.build, o.getChildren()) + return left + right + + def build_Mul(self, o): + left, right = map(self.build, o.getChildren()) + return left * right + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_NoneType(self, o): + return None + + def build_UnarySub(self, o): + return -self.build(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build(o.getChildren()[0]) + + +class _Builder3: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def astnode(self, s): + """Return a Python3 ast Node compiled from a string.""" + try: + import ast + except ImportError: + # Fallback to eval when ast package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = ast.parse("__tempvalue__ = " + s) + return p.body[0].value + + def build_Subscript(self, o): + return self.build(o.value)[self.build(o.slice)] + + def build_Index(self, o): + return self.build(o.value) + + def build_Call(self, o): + callee = self.build(o.func) + + if o.args is None: + args = () + else: + args = tuple([self.build(a) for a in o.args]) + + if o.starargs is None: + starargs = () + else: + starargs = self.build(o.starargs) + + if o.kwargs is None: + kwargs = {} + else: + kwargs = self.build(o.kwargs) + + return callee(*(args + starargs), **kwargs) + + def build_List(self, o): + return list(map(self.build, o.elts)) + + def build_Str(self, o): + return o.s + + def build_Num(self, o): + return o.n + + def build_Dict(self, o): + return dict([(self.build(k), self.build(v)) + for k, v in zip(o.keys, o.values)]) + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.id + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + import builtins + return getattr(builtins, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_UnaryOp(self, o): + op, operand = map(self.build, [o.op, o.operand]) + return op(operand) + + def build_BinOp(self, o): + left, op, right = map(self.build, [o.left, o.op, o.right]) + return op(left, right) + + def build_Add(self, o): + return _operator.add + + def build_Mult(self, o): + return _operator.mul + + def build_USub(self, o): + return _operator.neg + + def build_Attribute(self, o): + parent = self.build(o.value) + return getattr(parent, o.attr) + + def build_NoneType(self, o): + return None + + +def unrepr(s): + """Return a Python object compiled from a string.""" + if not s: + return s + if sys.version_info < (3, 0): + b = _Builder2() + else: + b = _Builder3() + obj = b.astnode(s) + return b.build(obj) + + +def modules(modulePath): + """Load a module and retrieve a reference to that module.""" + try: + mod = sys.modules[modulePath] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(modulePath, globals(), locals(), ['']) + return mod + +def attributes(full_attribute_name): + """Load a module and retrieve an attribute of that module.""" + + # Parse out the path, module, and attribute + last_dot = full_attribute_name.rfind(".") + attr_name = full_attribute_name[last_dot + 1:] + mod_path = full_attribute_name[:last_dot] + + mod = modules(mod_path) + # Let an AttributeError propagate outward. + try: + attr = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + # Return a reference to the attribute. + return attr + + diff --git a/src/cherrypy/lib/safemime.py b/src/cherrypy/lib/safemime.py deleted file mode 100644 index 0d13ae9a91..0000000000 --- a/src/cherrypy/lib/safemime.py +++ /dev/null @@ -1,128 +0,0 @@ -import cherrypy - - -class MultipartWrapper(object): - """Wraps a file-like object, returning '' when Content-Length is reached. - - The cgi module's logic for reading multipart MIME messages doesn't - allow the parts to know when the Content-Length for the entire message - has been reached, and doesn't allow for multipart-MIME messages that - omit the trailing CRLF (Flash 8's FileReference.upload(url), for example, - does this). The read_lines_to_outerboundary function gets stuck in a loop - until the socket times out. - - This rfile wrapper simply monitors the incoming stream. When a read is - attempted past the Content-Length, it returns an empty string rather - than timing out (of course, if the last read *overlaps* the C-L, you'll - get the last bit of data up to C-L, and then the next read will return - an empty string). - """ - - def __init__(self, rfile, clen): - self.rfile = rfile - self.clen = clen - self.bytes_read = 0 - - def read(self, size = None): - if self.clen: - # Return '' if we've read all the data. - if self.bytes_read >= self.clen: - return '' - - # Reduce 'size' if it's over our limit. - new_bytes_read = self.bytes_read + size - if new_bytes_read > self.clen: - size = self.clen - self.bytes_read - - data = self.rfile.read(size) - self.bytes_read += len(data) - return data - - def readline(self, size = None): - if size is not None: - if self.clen: - # Return '' if we've read all the data. - if self.bytes_read >= self.clen: - return '' - - # Reduce 'size' if it's over our limit. - new_bytes_read = self.bytes_read + size - if new_bytes_read > self.clen: - size = self.clen - self.bytes_read - - data = self.rfile.readline(size) - self.bytes_read += len(data) - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - size = 256 - while True: - if self.clen: - # Return if we've read all the data. - if self.bytes_read >= self.clen: - return ''.join(res) - - # Reduce 'size' if it's over our limit. - new_bytes_read = self.bytes_read + size - if new_bytes_read > self.clen: - size = self.clen - self.bytes_read - - data = self.rfile.readline(size) - self.bytes_read += len(data) - res.append(data) - # See http://www.cherrypy.org/ticket/421 - if len(data) < size or data[-1:] == "\n": - return ''.join(res) - - def readlines(self, sizehint = 0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self.rfile - - def next(self): - if self.clen: - # Return '' if we've read all the data. - if self.bytes_read >= self.clen: - return '' - - data = self.rfile.next() - self.bytes_read += len(data) - return data - - -def safe_multipart(flash_only=False): - """Wrap request.rfile in a reader that won't crash on no trailing CRLF.""" - h = cherrypy.request.headers - if not h.get('Content-Type','').startswith('multipart/'): - return - if flash_only and not 'Shockwave Flash' in h.get('User-Agent', ''): - return - - clen = h.get('Content-Length', '0') - try: - clen = int(clen) - except ValueError: - return - cherrypy.request.rfile = MultipartWrapper(cherrypy.request.rfile, clen) - -def init(): - """Create a Tool for safe_multipart and add it to cherrypy.tools.""" - cherrypy.tools.safe_multipart = cherrypy.Tool('before_request_body', - safe_multipart) - diff --git a/src/cherrypy/lib/sessions.py b/src/cherrypy/lib/sessions.py index 326e72c2b2..1cd651c4f2 100644 --- a/src/cherrypy/lib/sessions.py +++ b/src/cherrypy/lib/sessions.py @@ -1,32 +1,99 @@ """Session implementation for CherryPy. -We use cherrypy.request to store some convenient variables as -well as data about the session for the current request. Instead of -polluting cherrypy.request we use a Session object bound to -cherrypy.session to store these variables. +You need to edit your config file to use sessions. Here's an example:: + + [/] + tools.sessions.on = True + tools.sessions.storage_type = "file" + tools.sessions.storage_path = "/home/site/sessions" + tools.sessions.timeout = 60 + +This sets the session to be stored in files in the directory /home/site/sessions, +and the session timeout to 60 minutes. If you omit ``storage_type`` the sessions +will be saved in RAM. ``tools.sessions.on`` is the only required line for +working sessions, the rest are optional. + +By default, the session ID is passed in a cookie, so the client's browser must +have cookies enabled for your site. + +To set data for the current session, use +``cherrypy.session['fieldname'] = 'fieldvalue'``; +to get data use ``cherrypy.session.get('fieldname')``. + +================ +Locking sessions +================ + +By default, the ``'locking'`` mode of sessions is ``'implicit'``, which means +the session is locked early and unlocked late. If you want to control when the +session data is locked and unlocked, set ``tools.sessions.locking = 'explicit'``. +Then call ``cherrypy.session.acquire_lock()`` and ``cherrypy.session.release_lock()``. +Regardless of which mode you use, the session is guaranteed to be unlocked when +the request is complete. + +================= +Expiring Sessions +================= + +You can force a session to expire with :func:`cherrypy.lib.sessions.expire`. +Simply call that function at the point you want the session to expire, and it +will cause the session cookie to expire client-side. + +=========================== +Session Fixation Protection +=========================== + +If CherryPy receives, via a request cookie, a session id that it does not +recognize, it will reject that id and create a new one to return in the +response cookie. This `helps prevent session fixation attacks +`_. +However, CherryPy "recognizes" a session id by looking up the saved session +data for that id. Therefore, if you never save any session data, +**you will get a new session id for every request**. + +================ +Sharing Sessions +================ + +If you run multiple instances of CherryPy (for example via mod_python behind +Apache prefork), you most likely cannot use the RAM session backend, since each +instance of CherryPy will have its own memory space. Use a different backend +instead, and verify that all instances are pointing at the same file or db +location. Alternately, you might try a load balancer which makes sessions +"sticky". Google is your friend, there. + +================ +Expiration Dates +================ + +The response cookie will possess an expiration date to inform the client at +which point to stop sending the cookie back in requests. If the server time +and client time differ, expect sessions to be unreliable. **Make sure the +system time of your server is accurate**. + +CherryPy defaults to a 60-minute session timeout, which also applies to the +cookie which is sent to the client. Unfortunately, some versions of Safari +("4 public beta" on Windows XP at least) appear to have a bug in their parsing +of the GMT expiration date--they appear to interpret the date as one hour in +the past. Sixty minutes minus one hour is pretty close to zero, so you may +experience this bug as a new session id for every request, unless the requests +are less than one second apart. To fix, try increasing the session.timeout. + +On the other extreme, some users report Firefox sending cookies after their +expiration date, although this was on a system with an inaccurate system time. +Maybe FF doesn't trust system time. """ import datetime import os -try: - import cPickle as pickle -except ImportError: - import pickle -import random - -try: - # Python 2.5+ - from hashlib import sha1 as sha -except ImportError: - from sha import new as sha - import time import threading import types from warnings import warn import cherrypy -from cherrypy.lib import http +from cherrypy._cpcompat import copyitems, pickle, random20, unicodestr +from cherrypy.lib import httputil missing = object() @@ -34,59 +101,90 @@ missing = object() class Session(object): """A CherryPy dict-like Session object (one per request).""" - __metaclass__ = cherrypy._AttributeDocstrings - _id = None - id_observers = None - id_observers__doc = "A list of callbacks to which to pass new id's." - id__doc = "The current session ID." + id_observers = None + "A list of callbacks to which to pass new id's." + def _get_id(self): return self._id def _set_id(self, value): self._id = value for o in self.id_observers: o(value) - id = property(_get_id, _set_id, doc=id__doc) + id = property(_get_id, _set_id, doc="The current session ID.") timeout = 60 - timeout__doc = "Number of minutes after which to delete session data." + "Number of minutes after which to delete session data." locked = False - locked__doc = """ + """ If True, this session instance has exclusive read/write access to session data.""" loaded = False - loaded__doc = """ + """ If True, data has been retrieved from storage. This should happen automatically on the first attempt to access session data.""" clean_thread = None - clean_thread__doc = "Class-level Monitor which calls self.clean_up." + "Class-level Monitor which calls self.clean_up." clean_freq = 5 - clean_freq__doc = "The poll rate for expired session cleanup in minutes." + "The poll rate for expired session cleanup in minutes." + + originalid = None + "The session id passed by the client. May be missing or unsafe." + + missing = False + "True if the session requested by the client did not exist." + + regenerated = False + """ + True if the application called session.regenerate(). This is not set by + internal calls to regenerate the session id.""" + + debug=False def __init__(self, id=None, **kwargs): self.id_observers = [] self._data = {} - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(self, k, v) + self.originalid = id + self.missing = False if id is None: - self.regenerate() + if self.debug: + cherrypy.log('No id given; making a new one', 'TOOLS.SESSIONS') + self._regenerate() else: self.id = id if not self._exists(): + if self.debug: + cherrypy.log('Expired or malicious session %r; ' + 'making a new one' % id, 'TOOLS.SESSIONS') # Expired or malicious session. Make a new one. # See http://www.cherrypy.org/ticket/709. self.id = None - self.regenerate() + self.missing = True + self._regenerate() + + def now(self): + """Generate the session specific concept of 'now'. + + Other session providers can override this to use alternative, + possibly timezone aware, versions of 'now'. + """ + return datetime.datetime.now() def regenerate(self): """Replace the current session (with a new id).""" + self.regenerated = True + self._regenerate() + + def _regenerate(self): if self.id is not None: self.delete() @@ -108,26 +206,21 @@ class Session(object): """Clean up expired sessions.""" pass - try: - os.urandom(20) - except (AttributeError, NotImplementedError): - # os.urandom not available until Python 2.4. Fall back to random.random. - def generate_id(self): - """Return a new session id.""" - return sha('%s' % random.random()).hexdigest() - else: - def generate_id(self): - """Return a new session id.""" - return os.urandom(20).encode('hex') + def generate_id(self): + """Return a new session id.""" + return random20() def save(self): """Save session data.""" try: # If session data has never been loaded then it's never been - # accessed: no need to delete it + # accessed: no need to save it if self.loaded: t = datetime.timedelta(seconds = self.timeout * 60) - expiration_time = datetime.datetime.now() + t + expiration_time = self.now() + t + if self.debug: + cherrypy.log('Saving with expiry %s' % expiration_time, + 'TOOLS.SESSIONS') self._save(expiration_time) finally: @@ -139,8 +232,9 @@ class Session(object): """Copy stored session data into this session instance.""" data = self._load() # data is either None or a tuple (session_data, expiration_time) - if data is None or data[1] < datetime.datetime.now(): - # Expired session: flush session data + if data is None or data[1] < self.now(): + if self.debug: + cherrypy.log('Expired session, flushing data', 'TOOLS.SESSIONS') self._data = {} else: self._data = data[0] @@ -153,7 +247,8 @@ class Session(object): # clean_up is in instancemethod and not a classmethod, # so that tool config can be accessed inside the method. t = cherrypy.process.plugins.Monitor( - cherrypy.engine, self.clean_up, self.clean_freq * 60) + cherrypy.engine, self.clean_up, self.clean_freq * 60, + name='Session cleanup') t.subscribe() cls.clean_thread = t t.start() @@ -189,10 +284,11 @@ class Session(object): if not self.loaded: self.load() return key in self._data - def has_key(self, key): - """D.has_key(k) -> True if D has a key k, else False.""" - if not self.loaded: self.load() - return self._data.has_key(key) + if hasattr({}, 'has_key'): + def has_key(self, key): + """D.has_key(k) -> True if D has a key k, else False.""" + if not self.loaded: self.load() + return key in self._data def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -238,9 +334,9 @@ class RamSession(Session): def clean_up(self): """Clean up expired sessions.""" - now = datetime.datetime.now() - for id, (data, expiration_time) in self.cache.items(): - if expiration_time < now: + now = self.now() + for id, (data, expiration_time) in copyitems(self.cache): + if expiration_time <= now: try: del self.cache[id] except KeyError: @@ -250,6 +346,11 @@ class RamSession(Session): except KeyError: pass + # added to remove obsolete lock objects + for id in list(self.locks): + if id not in self.cache: + self.locks.pop(id, None) + def _exists(self): return self.id in self.cache @@ -260,7 +361,7 @@ class RamSession(Session): self.cache[self.id] = (self._data, expiration_time) def _delete(self): - del self.cache[self.id] + self.cache.pop(self.id, None) def acquire_lock(self): """Acquire an exclusive lock on the currently-loaded session data.""" @@ -280,13 +381,16 @@ class RamSession(Session): class FileSession(Session): """Implementation of the File backend for sessions - storage_path: the folder where session data will be saved. Each session + storage_path + The folder where session data will be saved. Each session will be saved as pickle.dump(data, expiration_time) in its own file; the filename will be self.SESSION_PREFIX + self.id. + """ SESSION_PREFIX = 'session-' LOCK_SUFFIX = '.lock' + pickle_protocol = pickle.HIGHEST_PROTOCOL def __init__(self, id=None, **kwargs): # The 'storage_path' arg is required for file-based sessions. @@ -302,7 +406,7 @@ class FileSession(Session): # The 'storage_path' arg is required for file-based sessions. kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(cls, k, v) # Warn if any lock files exist at startup. @@ -342,7 +446,7 @@ class FileSession(Session): def _save(self, expiration_time): f = open(self._get_file_path(), "wb") try: - pickle.dump((self._data, expiration_time), f) + pickle.dump((self._data, expiration_time), f, self.pickle_protocol) finally: f.close() @@ -376,7 +480,7 @@ class FileSession(Session): def clean_up(self): """Clean up expired sessions.""" - now = datetime.datetime.now() + now = self.now() # Iterate over all session files in self.storage_path for fname in os.listdir(self.storage_path): if (fname.startswith(self.SESSION_PREFIX) @@ -405,7 +509,7 @@ class FileSession(Session): class PostgresqlSession(Session): """ Implementation of the PostgreSQL backend for sessions. It assumes - a table like this: + a table like this:: create table session ( id varchar(40), @@ -416,6 +520,8 @@ class PostgresqlSession(Session): You must provide your own get_db function. """ + pickle_protocol = pickle.HIGHEST_PROTOCOL + def __init__(self, id=None, **kwargs): Session.__init__(self, id, **kwargs) self.cursor = self.db.cursor() @@ -426,10 +532,10 @@ class PostgresqlSession(Session): This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(cls, k, v) - self.db = self.get_db() + cls.db = cls.get_db() setup = classmethod(setup) def __del__(self): @@ -457,7 +563,7 @@ class PostgresqlSession(Session): return data, expiration_time def _save(self, expiration_time): - pickled_data = pickle.dumps(self._data) + pickled_data = pickle.dumps(self._data, self.pickle_protocol) self.cursor.execute('update session set data = %s, ' 'expiration_time = %s where id = %s', (pickled_data, expiration_time, self.id)) @@ -482,7 +588,7 @@ class PostgresqlSession(Session): def clean_up(self): """Clean up expired sessions.""" self.cursor.execute('delete from session where expiration_time < %s', - (datetime.datetime.now(),)) + (self.now(),)) class MemcachedSession(Session): @@ -502,13 +608,26 @@ class MemcachedSession(Session): This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(cls, k, v) import memcache cls.cache = memcache.Client(cls.servers) setup = classmethod(setup) + def _get_id(self): + return self._id + def _set_id(self, value): + # This encode() call is where we differ from the superclass. + # Memcache keys MUST be byte strings, not unicode. + if isinstance(value, unicodestr): + value = value.encode('utf-8') + + self._id = value + for o in self.id_observers: + o(value) + id = property(_get_id, _set_id, doc="The current session ID.") + def _exists(self): self.mc_lock.acquire() try: @@ -558,21 +677,23 @@ def save(): if not hasattr(cherrypy.serving, "session"): return + request = cherrypy.serving.request + response = cherrypy.serving.response # Guard against running twice - if hasattr(cherrypy.request, "_sessionsaved"): + if hasattr(request, "_sessionsaved"): return - cherrypy.request._sessionsaved = True + request._sessionsaved = True - if cherrypy.response.stream: + if response.stream: # If the body is being streamed, we have to save the data # *after* the response has been written out - cherrypy.request.hooks.attach('on_end_request', cherrypy.session.save) + request.hooks.attach('on_end_request', cherrypy.session.save) else: # If the body is not being streamed, we save the data now # (so we can release the lock). - if isinstance(cherrypy.response.body, types.GeneratorType): - cherrypy.response.collapse_body() + if isinstance(response.body, types.GeneratorType): + response.collapse_body() cherrypy.session.save() save.failsafe = True @@ -587,29 +708,56 @@ close.priority = 90 def init(storage_type='ram', path=None, path_header=None, name='session_id', - timeout=60, domain=None, secure=False, clean_freq=5, **kwargs): + timeout=60, domain=None, secure=False, clean_freq=5, + persistent=True, httponly=False, debug=False, **kwargs): """Initialize session object (using cookies). - storage_type: one of 'ram', 'file', 'postgresql'. This will be used - to look up the corresponding class in cherrypy.lib.sessions + storage_type + One of 'ram', 'file', 'postgresql', 'memcached'. This will be + used to look up the corresponding class in cherrypy.lib.sessions globals. For example, 'file' will use the FileSession class. - path: the 'path' value to stick in the response cookie metadata. - path_header: if 'path' is None (the default), then the response + + path + The 'path' value to stick in the response cookie metadata. + + path_header + If 'path' is None (the default), then the response cookie 'path' will be pulled from request.headers[path_header]. - name: the name of the cookie. - timeout: the expiration timeout (in minutes) for both the cookie and - stored session data. - domain: the cookie domain. - secure: if False (the default) the cookie 'secure' value will not + + name + The name of the cookie. + + timeout + The expiration timeout (in minutes) for the stored session data. + If 'persistent' is True (the default), this is also the timeout + for the cookie. + + domain + The cookie domain. + + secure + If False (the default) the cookie 'secure' value will not be set. If True, the cookie 'secure' value will be set (to 1). - clean_freq (minutes): the poll rate for expired session cleanup. + + clean_freq (minutes) + The poll rate for expired session cleanup. + + persistent + If True (the default), the 'timeout' argument will be used + to expire the cookie. If False, the cookie will not have an expiry, + and the cookie will be a "session cookie" which expires when the + browser is closed. + + httponly + If False (the default) the cookie 'httponly' value will not be set. + If True, the cookie 'httponly' value will be set (to 1). Any additional kwargs will be bound to the new Session instance, and may be specific to the storage type. See the subclass of Session you're using for more information. """ - request = cherrypy.request + request = cherrypy.serving.request # Guard against running twice if hasattr(request, "_session_init_flag"): @@ -620,6 +768,9 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', id = None if name in request.cookie: id = request.cookie[name].value + if debug: + cherrypy.log('ID obtained from request.cookie: %r' % id, + 'TOOLS.SESSIONS') # Find the storage class and call setup (first time only). storage_class = storage_type.title() + 'Session' @@ -634,36 +785,62 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', kwargs['timeout'] = timeout kwargs['clean_freq'] = clean_freq cherrypy.serving.session = sess = storage_class(id, **kwargs) + sess.debug = debug def update_cookie(id): """Update the cookie every time the session id changes.""" - cherrypy.response.cookie[name] = id + cherrypy.serving.response.cookie[name] = id sess.id_observers.append(update_cookie) # Create cherrypy.session which will proxy to cherrypy.serving.session if not hasattr(cherrypy, "session"): cherrypy.session = cherrypy._ThreadLocalProxy('session') + if persistent: + cookie_timeout = timeout + else: + # See http://support.microsoft.com/kb/223799/EN-US/ + # and http://support.mozilla.com/en-US/kb/Cookies + cookie_timeout = None set_response_cookie(path=path, path_header=path_header, name=name, - timeout=timeout, domain=domain, secure=secure) + timeout=cookie_timeout, domain=domain, secure=secure, + httponly=httponly) def set_response_cookie(path=None, path_header=None, name='session_id', - timeout=60, domain=None, secure=False): + timeout=60, domain=None, secure=False, httponly=False): """Set a response cookie for the client. - path: the 'path' value to stick in the response cookie metadata. - path_header: if 'path' is None (the default), then the response + path + the 'path' value to stick in the response cookie metadata. + + path_header + if 'path' is None (the default), then the response cookie 'path' will be pulled from request.headers[path_header]. - name: the name of the cookie. - timeout: the expiration timeout for the cookie. - domain: the cookie domain. - secure: if False (the default) the cookie 'secure' value will not + + name + the name of the cookie. + + timeout + the expiration timeout for the cookie. If 0 or other boolean + False, no 'expires' param will be set, and the cookie will be a + "session cookie" which expires when the browser is closed. + + domain + the cookie domain. + + secure + if False (the default) the cookie 'secure' value will not be set. If True, the cookie 'secure' value will be set (to 1). + + httponly + If False (the default) the cookie 'httponly' value will not be set. + If True, the cookie 'httponly' value will be set (to 1). + """ # Set response cookie - cookie = cherrypy.response.cookie + cookie = cherrypy.serving.response.cookie cookie[name] = cherrypy.serving.session.id - cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header) + cookie[name]['path'] = (path or cherrypy.serving.request.headers.get(path_header) or '/') # We'd like to use the "max-age" param as indicated in @@ -673,19 +850,22 @@ def set_response_cookie(path=None, path_header=None, name='session_id', ## cookie[name]['max-age'] = timeout * 60 if False and timeout: # Changed by Kovid, we want the user to have to # re-authenticate on browser restart - cookie[name]['expires'] = http.HTTPDate(time.time() + timeout) + e = time.time() + (timeout * 60) + cookie[name]['expires'] = httputil.HTTPDate(e) if domain is not None: cookie[name]['domain'] = domain if secure: cookie[name]['secure'] = 1 - + if httponly: + if not cookie[name].isReservedKey('httponly'): + raise ValueError("The httponly cookie token is not supported.") + cookie[name]['httponly'] = 1 def expire(): """Expire the current session cookie.""" - name = cherrypy.request.config.get('tools.sessions.name', 'session_id') + name = cherrypy.serving.request.config.get('tools.sessions.name', 'session_id') one_year = 60 * 60 * 24 * 365 - exp = time.gmtime(time.time() - one_year) - t = time.strftime("%a, %d-%b-%Y %H:%M:%S GMT", exp) - cherrypy.response.cookie[name]['expires'] = t + e = time.time() - one_year + cherrypy.serving.response.cookie[name]['expires'] = httputil.HTTPDate(e) diff --git a/src/cherrypy/lib/static.py b/src/cherrypy/lib/static.py index 2a5a9f6829..2d1423071b 100644 --- a/src/cherrypy/lib/static.py +++ b/src/cherrypy/lib/static.py @@ -1,20 +1,27 @@ +try: + from io import UnsupportedOperation +except ImportError: + UnsupportedOperation = object() +import logging import mimetypes mimetypes.init() mimetypes.types_map['.dwg']='image/x-dwg' mimetypes.types_map['.ico']='image/x-icon' +mimetypes.types_map['.bz2']='application/x-bzip2' +mimetypes.types_map['.gz']='application/x-gzip' import os import re import stat import time -import urllib import cherrypy -from cherrypy.lib import cptools, http, file_generator_limited +from cherrypy._cpcompat import ntob, unquote +from cherrypy.lib import cptools, httputil, file_generator_limited -def serve_file(path, content_type=None, disposition=None, name=None): - """Set status, headers, and body in order to serve the given file. +def serve_file(path, content_type=None, disposition=None, name=None, debug=False): + """Set status, headers, and body in order to serve the given path. The Content-Type header will be set to the content_type arg, if provided. If not provided, the Content-Type will be guessed by the file extension @@ -26,29 +33,36 @@ def serve_file(path, content_type=None, disposition=None, name=None): header will be written. """ - response = cherrypy.response + response = cherrypy.serving.response # If path is relative, users should fix it by making path absolute. # That is, CherryPy should not guess where the application root is. # It certainly should *not* use cwd (since CP may be invoked from a - # variety of paths). If using tools.static, you can make your relative - # paths become absolute by supplying a value for "tools.static.root". + # variety of paths). If using tools.staticdir, you can make your relative + # paths become absolute by supplying a value for "tools.staticdir.root". if not os.path.isabs(path): - raise ValueError("'%s' is not an absolute path." % path) + msg = "'%s' is not an absolute path." % path + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') + raise ValueError(msg) try: st = os.stat(path) except OSError: + if debug: + cherrypy.log('os.stat(%r) failed' % path, 'TOOLS.STATIC') raise cherrypy.NotFound() # Check if path is a directory. if stat.S_ISDIR(st.st_mode): # Let the caller deal with it as they like. + if debug: + cherrypy.log('%r is a directory' % path, 'TOOLS.STATIC') raise cherrypy.NotFound() # Set the Last-Modified response header, so that # modified-since validation code can work. - response.headers['Last-Modified'] = http.HTTPDate(st.st_mtime) + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) cptools.validate_since() if content_type is None: @@ -57,77 +71,159 @@ def serve_file(path, content_type=None, disposition=None, name=None): i = path.rfind('.') if i != -1: ext = path[i:].lower() - content_type = mimetypes.types_map.get(ext, "text/plain") - response.headers['Content-Type'] = content_type + content_type = mimetypes.types_map.get(ext, None) + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + cd = None if disposition is not None: if name is None: name = os.path.basename(path) cd = '%s; filename="%s"' % (disposition, name) response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') # Set Content-Length and use an iterable (file object) # this way CP won't load the whole file in memory - c_len = st.st_size - bodyfile = open(path, 'rb') + content_length = st.st_size + fileobj = open(path, 'rb') + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + +def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, + debug=False): + """Set status, headers, and body in order to serve the given file object. + + The Content-Type header will be set to the content_type arg, if provided. + + If disposition is not None, the Content-Disposition header will be set + to "; filename=". If name is None, 'filename' will + not be set. If disposition is None, no Content-Disposition header will + be written. + + CAUTION: If the request contains a 'Range' header, one or more seek()s will + be performed on the file object. This may cause undesired behavior if + the file object is not seekable. It could also produce undesired results + if the caller set the read position of the file object prior to calling + serve_fileobj(), expecting that the data would be served starting from that + position. + """ + + response = cherrypy.serving.response + + try: + st = os.fstat(fileobj.fileno()) + except AttributeError: + if debug: + cherrypy.log('os has no fstat attribute', 'TOOLS.STATIC') + content_length = None + except UnsupportedOperation: + content_length = None + else: + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) + cptools.validate_since() + content_length = st.st_size + + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + + cd = None + if disposition is not None: + if name is None: + cd = disposition + else: + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') + + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + +def _serve_fileobj(fileobj, content_type, content_length, debug=False): + """Internal. Set response.body to the given file object, perhaps ranged.""" + response = cherrypy.serving.response # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code - if cherrypy.request.protocol >= (1, 1): + request = cherrypy.serving.request + if request.protocol >= (1, 1): response.headers["Accept-Ranges"] = "bytes" - r = http.get_ranges(cherrypy.request.headers.get('Range'), c_len) + r = httputil.get_ranges(request.headers.get('Range'), content_length) if r == []: - response.headers['Content-Range'] = "bytes */%s" % c_len + response.headers['Content-Range'] = "bytes */%s" % content_length message = "Invalid Range (first-byte-pos greater than Content-Length)" + if debug: + cherrypy.log(message, 'TOOLS.STATIC') raise cherrypy.HTTPError(416, message) + if r: if len(r) == 1: # Return a single-part response. start, stop = r[0] - if stop > c_len: - stop = c_len + if stop > content_length: + stop = content_length r_len = stop - start + if debug: + cherrypy.log('Single part; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') response.status = "206 Partial Content" - response.headers['Content-Range'] = ("bytes %s-%s/%s" % - (start, stop - 1, c_len)) + response.headers['Content-Range'] = ( + "bytes %s-%s/%s" % (start, stop - 1, content_length)) response.headers['Content-Length'] = r_len - bodyfile.seek(start) - response.body = file_generator_limited(bodyfile, r_len) + fileobj.seek(start) + response.body = file_generator_limited(fileobj, r_len) else: # Return a multipart/byteranges response. response.status = "206 Partial Content" - import mimetools - boundary = mimetools.choose_boundary() + try: + # Python 3 + from email.generator import _make_boundary as choose_boundary + except ImportError: + # Python 2 + from mimetools import choose_boundary + boundary = choose_boundary() ct = "multipart/byteranges; boundary=%s" % boundary response.headers['Content-Type'] = ct - if response.headers.has_key("Content-Length"): + if "Content-Length" in response.headers: # Delete Content-Length header so finalize() recalcs it. del response.headers["Content-Length"] def file_ranges(): # Apache compatibility: - yield "\r\n" + yield ntob("\r\n") for start, stop in r: - yield "--" + boundary - yield "\r\nContent-type: %s" % content_type - yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" - % (start, stop - 1, c_len)) - bodyfile.seek(start) - for chunk in file_generator_limited(bodyfile, stop-start): + if debug: + cherrypy.log('Multipart; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') + yield ntob("--" + boundary, 'ascii') + yield ntob("\r\nContent-type: %s" % content_type, 'ascii') + yield ntob("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" + % (start, stop - 1, content_length), 'ascii') + fileobj.seek(start) + for chunk in file_generator_limited(fileobj, stop-start): yield chunk - yield "\r\n" + yield ntob("\r\n") # Final boundary - yield "--" + boundary + "--" + yield ntob("--" + boundary + "--", 'ascii') # Apache compatibility: - yield "\r\n" + yield ntob("\r\n") response.body = file_ranges() + return response.body else: - response.headers['Content-Length'] = c_len - response.body = bodyfile - else: - response.headers['Content-Length'] = c_len - response.body = bodyfile + if debug: + cherrypy.log('No byteranges requested', 'TOOLS.STATIC') + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + response.headers['Content-Length'] = content_length + response.body = fileobj return response.body def serve_download(path, name=None): @@ -136,7 +232,10 @@ def serve_download(path, name=None): return serve_file(path, "application/x-download", "attachment", name) -def _attempt(filename, content_types): +def _attempt(filename, content_types, debug=False): + if debug: + cherrypy.log('Attempting %r (content_types %r)' % + (filename, content_types), 'TOOLS.STATICDIR') try: # you can set the content types for a # complete directory per extension @@ -144,33 +243,45 @@ def _attempt(filename, content_types): if content_types: r, ext = os.path.splitext(filename) content_type = content_types.get(ext[1:], None) - serve_file(filename, content_type=content_type) + serve_file(filename, content_type=content_type, debug=debug) return True except cherrypy.NotFound: # If we didn't find the static file, continue handling the # request. We might find a dynamic handler instead. + if debug: + cherrypy.log('NotFound', 'TOOLS.STATICFILE') return False -def staticdir(section, dir, root="", match="", content_types=None, index=""): +def staticdir(section, dir, root="", match="", content_types=None, index="", + debug=False): """Serve a static resource from the given (root +) dir. - If 'match' is given, request.path_info will be searched for the given - regular expression before attempting to serve static content. + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. - If content_types is given, it should be a Python dictionary of - {file-extension: content-type} pairs, where 'file-extension' is - a string (e.g. "gif") and 'content-type' is the value to write - out in the Content-Type response header (e.g. "image/gif"). + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). - If 'index' is provided, it should be the (relative) name of a file to - serve for directory requests. For example, if the dir argument is - '/home/me', the Request-URI is 'myapp', and the index arg is - 'index.html', the file '/home/me/myapp/index.html' will be sought. + index + If provided, it should be the (relative) name of a file to + serve for directory requests. For example, if the dir argument is + '/home/me', the Request-URI is 'myapp', and the index arg is + 'index.html', the file '/home/me/myapp/index.html' will be sought. """ - if cherrypy.request.method not in ('GET', 'HEAD'): + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICDIR') return False - if match and not re.search(match, cherrypy.request.path_info): + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICDIR') return False # Allow the use of '~' to refer to a user's home directory. @@ -180,6 +291,8 @@ def staticdir(section, dir, root="", match="", content_types=None, index=""): if not os.path.isabs(dir): if not root: msg = "Static dir requires an absolute dir (or root)." + if debug: + cherrypy.log(msg, 'TOOLS.STATICDIR') raise ValueError(msg) dir = os.path.join(root, dir) @@ -188,11 +301,14 @@ def staticdir(section, dir, root="", match="", content_types=None, index=""): if section == 'global': section = "/" section = section.rstrip(r"\/") - branch = cherrypy.request.path_info[len(section) + 1:] - branch = urllib.unquote(branch.lstrip(r"\/")) + branch = request.path_info[len(section) + 1:] + branch = unquote(branch.lstrip(r"\/")) # If branch is "", filename will end in a slash filename = os.path.join(dir, branch) + if debug: + cherrypy.log('Checking file %r to fulfill %r' % + (filename, request.path_info), 'TOOLS.STATICDIR') # There's a chance that the branch pulled from the URL might # have ".." or similar uplevel attacks in it. Check that the final @@ -206,31 +322,42 @@ def staticdir(section, dir, root="", match="", content_types=None, index=""): if index: handled = _attempt(os.path.join(filename, index), content_types) if handled: - cherrypy.request.is_index = filename[-1] in (r"\/") + request.is_index = filename[-1] in (r"\/") return handled -def staticfile(filename, root=None, match="", content_types=None): +def staticfile(filename, root=None, match="", content_types=None, debug=False): """Serve a static resource from the given (root +) filename. - If 'match' is given, request.path_info will be searched for the given - regular expression before attempting to serve static content. + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). - If content_types is given, it should be a Python dictionary of - {file-extension: content-type} pairs, where 'file-extension' is - a string (e.g. "gif") and 'content-type' is the value to write - out in the Content-Type response header (e.g. "image/gif"). """ - if cherrypy.request.method not in ('GET', 'HEAD'): + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICFILE') return False - if match and not re.search(match, cherrypy.request.path_info): + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICFILE') return False # If filename is relative, make absolute using "root". if not os.path.isabs(filename): if not root: msg = "Static tool requires an absolute filename (got '%s')." % filename + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') raise ValueError(msg) filename = os.path.join(root, filename) - return _attempt(filename, content_types) + return _attempt(filename, content_types, debug=debug) diff --git a/src/cherrypy/lib/tidy.py b/src/cherrypy/lib/tidy.py deleted file mode 100644 index ed337c6a1e..0000000000 --- a/src/cherrypy/lib/tidy.py +++ /dev/null @@ -1,184 +0,0 @@ -"""Functions to run cherrypy.response through Tidy or NSGML.""" - -import cgi -import os -import StringIO -import traceback - -import cherrypy - -def tidy(temp_dir, tidy_path, strict_xml=False, errors_to_ignore=None, - indent=False, wrap=False, warnings=True): - """Run cherrypy.response through Tidy. - - If either 'indent' or 'wrap' are specified, then response.body will be - set to the output of tidy. Otherwise, only errors (including warnings, - if warnings is True) will change the body. - - Note that we use the standalone Tidy tool rather than the python - mxTidy module. This is because this module does not seem to be - stable and it crashes on some HTML pages (which means that the - server would also crash) - """ - response = cherrypy.response - - # the tidy tool, by its very nature it's not generator friendly, - # so we just collapse the body and work with it. - orig_body = response.collapse_body() - - fct = response.headers.get('Content-Type', '') - ct = fct.split(';')[0] - encoding = '' - i = fct.find('charset=') - if i != -1: - encoding = fct[i + 8:] - - if ct == 'text/html': - page_file = os.path.join(temp_dir, 'page.html') - open(page_file, 'wb').write(orig_body) - - out_file = os.path.join(temp_dir, 'tidy.out') - err_file = os.path.join(temp_dir, 'tidy.err') - tidy_enc = encoding.replace('-', '') - if tidy_enc: - tidy_enc = '-' + tidy_enc - - strict_xml = ("", " -xml")[bool(strict_xml)] - - if indent: - indent = ' -indent' - else: - indent = '' - - if wrap is False: - wrap = '' - else: - try: - wrap = ' -wrap %d' % int(tidyWrap) - except: - wrap = '' - - result = os.system('"%s" %s%s%s%s -f %s -o %s %s' % - (tidy_path, tidy_enc, strict_xml, indent, wrap, - err_file, out_file, page_file)) - use_output = bool(indent or wrap) and not result - if use_output: - output = open(out_file, 'rb').read() - - new_errs = [] - for err in open(err_file, 'rb').read().splitlines(): - if (err.find('Error') != -1 or - (warnings and err.find('Warning') != -1)): - ignore = 0 - for err_ign in errors_to_ignore or []: - if err.find(err_ign) != -1: - ignore = 1 - break - if not ignore: - new_errs.append(err) - - if new_errs: - response.body = wrong_content('
'.join(new_errs), orig_body) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - return - elif strict_xml: - # The HTML is OK, but is it valid XML? - # Use elementtree to parse XML - from elementtree.ElementTree import parse - tag_list = ['nbsp', 'quot'] - for tag in tag_list: - orig_body = orig_body.replace('&' + tag + ';', tag.upper()) - - if encoding: - enctag = '' % encoding - orig_body = enctag + orig_body - - f = StringIO.StringIO(orig_body) - try: - tree = parse(f) - except: - # Wrong XML - body_file = StringIO.StringIO() - traceback.print_exc(file = body_file) - body_file = '
'.join(body_file.getvalue()) - response.body = wrong_content(body_file, orig_body, "XML") - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - return - - if use_output: - response.body = [output] - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - -def html_space(text): - """Escape text, replacing space with nbsp and tab with 4 nbsp's.""" - return cgi.escape(text).replace('\t', ' ').replace(' ', ' ') - -def html_break(text): - """Escape text, replacing newline with HTML br element.""" - return cgi.escape(text).replace('\n', '
') - -def wrong_content(header, body, content_type="HTML"): - output = ["Wrong %s:
%s
" % (content_type, html_break(header))] - for i, line in enumerate(body.splitlines()): - output.append("%03d - %s" % (i + 1, html_space(line))) - return "
".join(output) - - -def nsgmls(temp_dir, nsgmls_path, catalog_path, errors_to_ignore=None): - response = cherrypy.response - - # the tidy tool, by its very nature it's not generator friendly, - # so we just collect the body and work with it. - orig_body = response.collapse_body() - - fct = response.headers.get('Content-Type', '') - ct = fct.split(';')[0] - encoding = '' - i = fct.find('charset=') - if i != -1: - encoding = fct[i + 8:] - if ct == 'text/html': - # Remove bits of Javascript (nsgmls doesn't seem to handle - # them correctly (for instance, if ', i) - if j == -1: - break - orig_body = orig_body[:i] + orig_body[j+9:] - - page_file = os.path.join(temp_dir, 'page.html') - open(page_file, 'wb').write(orig_body) - - err_file = os.path.join(temp_dir, 'nsgmls.err') - command = ('%s -c%s -f%s -s -E10 %s' % - (nsgmls_path, catalog_path, err_file, page_file)) - command = command.replace('\\', '/') - os.system(command) - errs = open(err_file, 'rb').read() - - new_errs = [] - for err in errs.splitlines(): - ignore = False - for err_ign in errors_to_ignore or []: - if err.find(err_ign) != -1: - ignore = True - break - if not ignore: - new_errs.append(err) - - if new_errs: - response.body = wrong_content('
'.join(new_errs), orig_body) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - diff --git a/src/cherrypy/lib/wsgiapp.py b/src/cherrypy/lib/wsgiapp.py deleted file mode 100644 index 8aeb5755d9..0000000000 --- a/src/cherrypy/lib/wsgiapp.py +++ /dev/null @@ -1,77 +0,0 @@ -"""A CherryPy tool for hosting a foreign WSGI application.""" - -import sys -import warnings - -import cherrypy - - -# is this sufficient for start_response? -def start_response(status, response_headers, exc_info=None): - cherrypy.response.status = status - headers_dict = dict(response_headers) - cherrypy.response.headers.update(headers_dict) - -def make_environ(): - """grabbed some of below from wsgiserver.py - - for hosting WSGI apps in non-WSGI environments (yikes!) - """ - - request = cherrypy.request - - # create and populate the wsgi environ - environ = dict() - environ["wsgi.version"] = (1,0) - environ["wsgi.url_scheme"] = request.scheme - environ["wsgi.input"] = request.rfile - environ["wsgi.errors"] = sys.stderr - environ["wsgi.multithread"] = True - environ["wsgi.multiprocess"] = False - environ["wsgi.run_once"] = False - environ["REQUEST_METHOD"] = request.method - environ["SCRIPT_NAME"] = request.script_name - environ["PATH_INFO"] = request.path_info - environ["QUERY_STRING"] = request.query_string - environ["SERVER_PROTOCOL"] = request.protocol - environ["SERVER_NAME"] = request.local.name - environ["SERVER_PORT"] = request.local.port - environ["REMOTE_HOST"] = request.remote.name - environ["REMOTE_ADDR"] = request.remote.ip - environ["REMOTE_PORT"] = request.remote.port - # then all the http headers - headers = request.headers - environ["CONTENT_TYPE"] = headers.get("Content-type", "") - environ["CONTENT_LENGTH"] = headers.get("Content-length", "") - for (k, v) in headers.iteritems(): - envname = "HTTP_" + k.upper().replace("-","_") - environ[envname] = v - return environ - - -def run(app, env=None): - """Run the given WSGI app and set response.body to its output.""" - warnings.warn("This module is deprecated and will be removed in " - "Cherrypy 3.2. See http://www.cherrypy.org/ticket/700 " - "for more information.") - - try: - environ = cherrypy.request.wsgi_environ.copy() - environ['SCRIPT_NAME'] = cherrypy.request.script_name - environ['PATH_INFO'] = cherrypy.request.path_info - except AttributeError: - environ = make_environ() - - if env: - environ.update(env) - - # run the wsgi app and have it set response.body - response = app(environ, start_response) - try: - cherrypy.response.body = [x for x in response] - finally: - if hasattr(response, "close"): - response.close() - - return True - diff --git a/src/cherrypy/lib/xmlrpc.py b/src/cherrypy/lib/xmlrpcutil.py similarity index 78% rename from src/cherrypy/lib/xmlrpc.py rename to src/cherrypy/lib/xmlrpcutil.py index 59ee0278fe..9a44464bc0 100644 --- a/src/cherrypy/lib/xmlrpc.py +++ b/src/cherrypy/lib/xmlrpcutil.py @@ -1,13 +1,19 @@ import sys import cherrypy +from cherrypy._cpcompat import ntob +def get_xmlrpclib(): + try: + import xmlrpc.client as x + except ImportError: + import xmlrpclib as x + return x def process_body(): """Return (params, method) from request body.""" try: - import xmlrpclib - return xmlrpclib.loads(cherrypy.request.body.read()) + return get_xmlrpclib().loads(cherrypy.request.body.read()) except Exception: return ('ERROR PARAMS', ), 'ERRORMETHOD' @@ -29,13 +35,13 @@ def _set_response(body): # as a "Protocol Error", we'll just return 200 every time. response = cherrypy.response response.status = '200 OK' - response.body = body + response.body = ntob(body, 'utf-8') response.headers['Content-Type'] = 'text/xml' response.headers['Content-Length'] = len(body) def respond(body, encoding='utf-8', allow_none=0): - import xmlrpclib + xmlrpclib = get_xmlrpclib() if not isinstance(body, xmlrpclib.Fault): body = (body,) _set_response(xmlrpclib.dumps(body, methodresponse=1, @@ -44,6 +50,6 @@ def respond(body, encoding='utf-8', allow_none=0): def on_error(*args, **kwargs): body = str(sys.exc_info()[1]) - import xmlrpclib + xmlrpclib = get_xmlrpclib() _set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body))) diff --git a/src/cherrypy/process/plugins.py b/src/cherrypy/process/plugins.py index 0e8b4bf919..ba618a0bd0 100644 --- a/src/cherrypy/process/plugins.py +++ b/src/cherrypy/process/plugins.py @@ -2,19 +2,38 @@ import os import re -try: - set -except NameError: - from sets import Set as set import signal as _signal import sys import time import threading +from cherrypy._cpcompat import basestring, get_daemon, get_thread_ident, ntob, set + +# _module__file__base is used by Autoreload to make +# absolute any filenames retrieved from sys.modules which are not +# already absolute paths. This is to work around Python's quirk +# of importing the startup script and using a relative filename +# for it in sys.modules. +# +# Autoreload examines sys.modules afresh every time it runs. If an application +# changes the current directory by executing os.chdir(), then the next time +# Autoreload runs, it will not be able to find any filenames which are +# not absolute paths, because the current directory is not the same as when the +# module was first imported. Autoreload will then wrongly conclude the file has +# "changed", and initiate the shutdown/re-exec sequence. +# See ticket #917. +# For this workaround to have a decent probability of success, this module +# needs to be imported as early as possible, before the app has much chance +# to change the working directory. +_module__file__base = os.getcwd() + class SimplePlugin(object): """Plugin base class which auto-subscribes methods for known channels.""" + bus = None + """A :class:`Bus `, usually cherrypy.engine.""" + def __init__(self, bus): self.bus = bus @@ -39,16 +58,33 @@ class SimplePlugin(object): class SignalHandler(object): """Register bus channels (and listeners) for system signals. - By default, instantiating this object subscribes the following signals - and listeners: + You can modify what signals your application listens for, and what it does + when it receives signals, by modifying :attr:`SignalHandler.handlers`, + a dict of {signal name: callback} pairs. The default set is:: - TERM: bus.exit - HUP : bus.restart - USR1: bus.graceful + handlers = {'SIGTERM': self.bus.exit, + 'SIGHUP': self.handle_SIGHUP, + 'SIGUSR1': self.bus.graceful, + } + + The :func:`SignalHandler.handle_SIGHUP`` method calls + :func:`bus.restart()` + if the process is daemonized, but + :func:`bus.exit()` + if the process is attached to a TTY. This is because Unix window + managers tend to send SIGHUP to terminal windows when the user closes them. + + Feel free to add signals which are not available on every platform. The + :class:`SignalHandler` will ignore errors raised from attempting to register + handlers for unknown signals. """ - # Map from signal numbers to names + handlers = {} + """A map from signal names (e.g. 'SIGTERM') to handlers (e.g. bus.exit).""" + signals = {} + """A map from signal numbers to names.""" + for k, v in vars(_signal).items(): if k.startswith('SIG') and not k.startswith('SIG_'): signals[v] = k @@ -61,18 +97,32 @@ class SignalHandler(object): 'SIGHUP': self.handle_SIGHUP, 'SIGUSR1': self.bus.graceful, } - + + if sys.platform[:4] == 'java': + del self.handlers['SIGUSR1'] + self.handlers['SIGUSR2'] = self.bus.graceful + self.bus.log("SIGUSR1 cannot be set on the JVM platform. " + "Using SIGUSR2 instead.") + self.handlers['SIGINT'] = self._jython_SIGINT_handler + self._previous_handlers = {} + def _jython_SIGINT_handler(self, signum=None, frame=None): + # See http://bugs.jython.org/issue1313 + self.bus.log('Keyboard Interrupt: shutting down bus') + self.bus.exit() + def subscribe(self): - for sig, func in self.handlers.iteritems(): + """Subscribe self.handlers to signals.""" + for sig, func in self.handlers.items(): try: self.set_handler(sig, func) except ValueError: pass def unsubscribe(self): - for signum, handler in self._previous_handlers.iteritems(): + """Unsubscribe self.handlers from signals.""" + for signum, handler in self._previous_handlers.items(): signame = self.signals[signum] if handler is None: @@ -126,6 +176,7 @@ class SignalHandler(object): self.bus.publish(signame) def handle_SIGHUP(self): + """Restart if daemonized, else exit.""" if os.isatty(sys.stdin.fileno()): # not daemonized (may be foreground or background) self.bus.log("SIGHUP caught but not daemonized. Exiting.") @@ -165,7 +216,8 @@ class DropPrivileges(SimplePlugin): elif isinstance(val, basestring): val = pwd.getpwnam(val)[2] self._uid = val - uid = property(_get_uid, _set_uid, doc="The uid under which to run.") + uid = property(_get_uid, _set_uid, + doc="The uid under which to run. Availability: Unix.") def _get_gid(self): return self._gid @@ -178,7 +230,8 @@ class DropPrivileges(SimplePlugin): elif isinstance(val, basestring): val = grp.getgrnam(val)[2] self._gid = val - gid = property(_get_gid, _set_gid, doc="The gid under which to run.") + gid = property(_get_gid, _set_gid, + doc="The gid under which to run. Availability: Unix.") def _get_umask(self): return self._umask @@ -191,7 +244,12 @@ class DropPrivileges(SimplePlugin): level=30) val = None self._umask = val - umask = property(_get_umask, _set_umask, doc="The umask under which to run.") + umask = property(_get_umask, _set_umask, + doc="""The default permission mode for newly created files and directories. + + Usually expressed in octal format, for example, ``0644``. + Availability: Unix, Windows. + """) def start(self): # uid/gid @@ -216,6 +274,7 @@ class DropPrivileges(SimplePlugin): self.bus.log('Started as uid: %r gid: %r' % current_ids()) if self.gid is not None: os.setgid(self.gid) + os.setgroups([]) if self.uid is not None: os.setuid(self.uid) self.bus.log('Running as uid: %r gid: %r' % current_ids()) @@ -242,8 +301,8 @@ class DropPrivileges(SimplePlugin): class Daemonizer(SimplePlugin): """Daemonize the running script. - Use this with a Web Site Process Bus via: - + Use this with a Web Site Process Bus via:: + Daemonizer(bus).subscribe() When this component finishes, the process is completely decoupled from @@ -296,8 +355,9 @@ class Daemonizer(SimplePlugin): # This is the first parent. Exit, now that we've forked. self.bus.log('Forking once.') os._exit(0) - except OSError, exc: + except OSError: # Python raises OSError rather than returning negative numbers. + exc = sys.exc_info()[1] sys.exit("%s: fork #1 failed: (%d) %s\n" % (sys.argv[0], exc.errno, exc.strerror)) @@ -309,7 +369,8 @@ class Daemonizer(SimplePlugin): if pid > 0: self.bus.log('Forking twice.') os._exit(0) # Exit second parent - except OSError, exc: + except OSError: + exc = sys.exc_info()[1] sys.exit("%s: fork #2 failed: (%d) %s\n" % (sys.argv[0], exc.errno, exc.strerror)) @@ -318,7 +379,7 @@ class Daemonizer(SimplePlugin): si = open(self.stdin, "r") so = open(self.stdout, "a+") - se = open(self.stderr, "a+", 0) + se = open(self.stderr, "a+") # os.dup2(fd, fd2) will close fd2 if necessary, # so we don't explicitly close stdin/out/err. @@ -345,7 +406,7 @@ class PIDFile(SimplePlugin): if self.finalized: self.bus.log('PID %r already written to %r.' % (pid, self.pidfile)) else: - open(self.pidfile, "wb").write(str(pid)) + open(self.pidfile, "wb").write(ntob("%s" % pid, 'utf8')) self.bus.log('PID %r written to %r.' % (pid, self.pidfile)) self.finalized = True start.priority = 70 @@ -361,38 +422,94 @@ class PIDFile(SimplePlugin): class PerpetualTimer(threading._Timer): - """A subclass of threading._Timer whose run() method repeats.""" + """A responsive subclass of threading._Timer whose run() method repeats. + + Use this timer only when you really need a very interruptible timer; + this checks its 'finished' condition up to 20 times a second, which can + results in pretty high CPU usage + """ def run(self): while True: self.finished.wait(self.interval) if self.finished.isSet(): return - self.function(*self.args, **self.kwargs) + try: + self.function(*self.args, **self.kwargs) + except Exception: + self.bus.log("Error in perpetual timer thread function %r." % + self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + +class BackgroundTask(threading.Thread): + """A subclass of threading.Thread whose run() method repeats. + + Use this class for most repeating tasks. It uses time.sleep() to wait + for each interval, which isn't very responsive; that is, even if you call + self.cancel(), you'll have to wait until the sleep() call finishes before + the thread stops. To compensate, it defaults to being daemonic, which means + it won't delay stopping the whole process. + """ + + def __init__(self, interval, function, args=[], kwargs={}, bus=None): + threading.Thread.__init__(self) + self.interval = interval + self.function = function + self.args = args + self.kwargs = kwargs + self.running = False + self.bus = bus + + def cancel(self): + self.running = False + + def run(self): + self.running = True + while self.running: + time.sleep(self.interval) + if not self.running: + return + try: + self.function(*self.args, **self.kwargs) + except Exception: + if self.bus: + self.bus.log("Error in background task thread function %r." + % self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + def _set_daemon(self): + return True class Monitor(SimplePlugin): - """WSPBus listener to periodically run a callback in its own thread. + """WSPBus listener to periodically run a callback in its own thread.""" - bus: a Web Site Process Bus object. - callback: the function to call at intervals. - frequency: the time in seconds between callback runs. - """ + callback = None + """The function to call at intervals.""" frequency = 60 + """The time in seconds between callback runs.""" - def __init__(self, bus, callback, frequency=60): + thread = None + """A :class:`BackgroundTask` thread.""" + + def __init__(self, bus, callback, frequency=60, name=None): SimplePlugin.__init__(self, bus) self.callback = callback self.frequency = frequency self.thread = None + self.name = name def start(self): - """Start our callback in its own perpetual timer thread.""" + """Start our callback in its own background thread.""" if self.frequency > 0: - threadname = self.__class__.__name__ + threadname = self.name or self.__class__.__name__ if self.thread is None: - self.thread = PerpetualTimer(self.frequency, self.callback) + self.thread = BackgroundTask(self.frequency, self.callback, + bus = self.bus) self.thread.setName(threadname) self.thread.start() self.bus.log("Started monitor thread %r." % threadname) @@ -401,28 +518,54 @@ class Monitor(SimplePlugin): start.priority = 70 def stop(self): - """Stop our callback's perpetual timer thread.""" + """Stop our callback's background task thread.""" if self.thread is None: - self.bus.log("No thread running for %s." % self.__class__.__name__) + self.bus.log("No thread running for %s." % self.name or self.__class__.__name__) else: if self.thread is not threading.currentThread(): name = self.thread.getName() self.thread.cancel() - self.thread.join() + if not get_daemon(self.thread): + self.bus.log("Joining %r" % name) + self.thread.join() self.bus.log("Stopped thread %r." % name) self.thread = None def graceful(self): - """Stop the callback's perpetual timer thread and restart it.""" + """Stop the callback's background task thread and restart it.""" self.stop() self.start() class Autoreloader(Monitor): - """Monitor which re-executes the process when files change.""" + """Monitor which re-executes the process when files change. + + This :ref:`plugin` restarts the process (via :func:`os.execv`) + if any of the files it monitors change (or is deleted). By default, the + autoreloader monitors all imported modules; you can add to the + set by adding to ``autoreload.files``:: + + cherrypy.engine.autoreload.files.add(myFile) + + If there are imported files you do *not* wish to monitor, you can adjust the + ``match`` attribute, a regular expression. For example, to stop monitoring + cherrypy itself:: + + cherrypy.engine.autoreload.match = r'^(?!cherrypy).+' + + Like all :class:`Monitor` plugins, + the autoreload plugin takes a ``frequency`` argument. The default is + 1 second; that is, the autoreloader will examine files once each second. + """ + + files = None + """The set of files to poll for modifications.""" frequency = 1 + """The interval in seconds at which to poll for modified files.""" + match = '.*' + """A regular expression by which to match filenames.""" def __init__(self, bus, frequency=1, match='.*'): self.mtimes = {} @@ -431,24 +574,30 @@ class Autoreloader(Monitor): Monitor.__init__(self, bus, self.run, frequency) def start(self): - """Start our own perpetual timer thread for self.run.""" + """Start our own background task thread for self.run.""" if self.thread is None: self.mtimes = {} Monitor.start(self) start.priority = 70 - def run(self): - """Reload the process if registered files have been modified.""" - sysfiles = set() + def sysfiles(self): + """Return a Set of sys.modules filenames to monitor.""" + files = set() for k, m in sys.modules.items(): if re.match(self.match, k): - if hasattr(m, '__loader__'): - if hasattr(m.__loader__, 'archive'): - k = m.__loader__.archive - k = getattr(m, '__file__', None) - sysfiles.add(k) - - for filename in sysfiles | self.files: + if hasattr(m, '__loader__') and hasattr(m.__loader__, 'archive'): + f = m.__loader__.archive + else: + f = getattr(m, '__file__', None) + if f is not None and not os.path.isabs(f): + # ensure absolute paths so a os.chdir() in the app doesn't break me + f = os.path.normpath(os.path.join(_module__file__base, f)) + files.add(f) + return files + + def run(self): + """Reload the process if registered files have been modified.""" + for filename in self.sysfiles() | self.files: if filename: if filename.endswith('.pyc'): filename = filename[:-1] @@ -493,21 +642,26 @@ class ThreadManager(SimplePlugin): 'stop_thread' listeners for you when it stops. """ + threads = None + """A map of {thread ident: index number} pairs.""" + def __init__(self, bus): self.threads = {} SimplePlugin.__init__(self, bus) self.bus.listeners.setdefault('acquire_thread', set()) + self.bus.listeners.setdefault('start_thread', set()) self.bus.listeners.setdefault('release_thread', set()) - + self.bus.listeners.setdefault('stop_thread', set()) + def acquire_thread(self): """Run 'start_thread' listeners for the current thread. If the current thread has already been seen, any 'start_thread' listeners will not be run again. """ - thread_ident = threading._get_ident() + thread_ident = get_thread_ident() if thread_ident not in self.threads: - # We can't just use _get_ident as the thread ID + # We can't just use get_ident as the thread ID # because some platforms reuse thread ID's. i = len(self.threads) + 1 self.threads[thread_ident] = i @@ -515,14 +669,14 @@ class ThreadManager(SimplePlugin): def release_thread(self): """Release the current thread and run 'stop_thread' listeners.""" - thread_ident = threading._get_ident() + thread_ident = get_thread_ident() i = self.threads.pop(thread_ident, None) if i is not None: self.bus.publish('stop_thread', i) def stop(self): """Release all threads and run all 'stop_thread' listeners.""" - for thread_ident, i in self.threads.iteritems(): + for thread_ident, i in self.threads.items(): self.bus.publish('stop_thread', i) self.threads.clear() graceful = stop diff --git a/src/cherrypy/process/servers.py b/src/cherrypy/process/servers.py index da469bfad2..456da1e5f5 100644 --- a/src/cherrypy/process/servers.py +++ b/src/cherrypy/process/servers.py @@ -1,5 +1,117 @@ -"""Adapt an HTTP server.""" +""" +Starting in CherryPy 3.1, cherrypy.server is implemented as an +:ref:`Engine Plugin`. It's an instance of +:class:`cherrypy._cpserver.Server`, which is a subclass of +:class:`cherrypy.process.servers.ServerAdapter`. The ``ServerAdapter`` class +is designed to control other servers, as well. +Multiple servers/ports +====================== + +If you need to start more than one HTTP server (to serve on multiple ports, or +protocols, etc.), you can manually register each one and then start them all +with engine.start:: + + s1 = ServerAdapter(cherrypy.engine, MyWSGIServer(host='0.0.0.0', port=80)) + s2 = ServerAdapter(cherrypy.engine, another.HTTPServer(host='127.0.0.1', SSL=True)) + s1.subscribe() + s2.subscribe() + cherrypy.engine.start() + +.. index:: SCGI + +FastCGI/SCGI +============ + +There are also Flup\ **F**\ CGIServer and Flup\ **S**\ CGIServer classes in +:mod:`cherrypy.process.servers`. To start an fcgi server, for example, +wrap an instance of it in a ServerAdapter:: + + addr = ('0.0.0.0', 4000) + f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=addr) + s = servers.ServerAdapter(cherrypy.engine, httpserver=f, bind_addr=addr) + s.subscribe() + +The :doc:`cherryd` startup script will do the above for +you via its `-f` flag. +Note that you need to download and install `flup `_ +yourself, whether you use ``cherryd`` or not. + +.. _fastcgi: +.. index:: FastCGI + +FastCGI +------- + +A very simple setup lets your cherry run with FastCGI. +You just need the flup library, +plus a running Apache server (with ``mod_fastcgi``) or lighttpd server. + +CherryPy code +^^^^^^^^^^^^^ + +hello.py:: + + #!/usr/bin/python + import cherrypy + + class HelloWorld: + \"""Sample request handler class.\""" + def index(self): + return "Hello world!" + index.exposed = True + + cherrypy.tree.mount(HelloWorld()) + # CherryPy autoreload must be disabled for the flup server to work + cherrypy.config.update({'engine.autoreload_on':False}) + +Then run :doc:`/deployguide/cherryd` with the '-f' arg:: + + cherryd -c -d -f -i hello.py + +Apache +^^^^^^ + +At the top level in httpd.conf:: + + FastCgiIpcDir /tmp + FastCgiServer /path/to/cherry.fcgi -idle-timeout 120 -processes 4 + +And inside the relevant VirtualHost section:: + + # FastCGI config + AddHandler fastcgi-script .fcgi + ScriptAliasMatch (.*$) /path/to/cherry.fcgi$1 + +Lighttpd +^^^^^^^^ + +For `Lighttpd `_ you can follow these +instructions. Within ``lighttpd.conf`` make sure ``mod_fastcgi`` is +active within ``server.modules``. Then, within your ``$HTTP["host"]`` +directive, configure your fastcgi script like the following:: + + $HTTP["url"] =~ "" { + fastcgi.server = ( + "/" => ( + "script.fcgi" => ( + "bin-path" => "/path/to/your/script.fcgi", + "socket" => "/tmp/script.sock", + "check-local" => "disable", + "disable-time" => 1, + "min-procs" => 1, + "max-procs" => 1, # adjust as needed + ), + ), + ) + } # end of $HTTP["url"] =~ "^/" + +Please see `Lighttpd FastCGI Docs +`_ for an explanation +of the possible configuration options. +""" + +import sys import time @@ -34,7 +146,9 @@ class ServerAdapter(object): def start(self): """Start the HTTP server.""" - if isinstance(self.bind_addr, tuple): + if self.bind_addr is None: + on_what = "unknown interface (dynamic?)" + elif isinstance(self.bind_addr, tuple): host, port = self.bind_addr on_what = "%s:%s" % (host, port) else: @@ -71,17 +185,16 @@ class ServerAdapter(object): """ try: self.httpserver.start() - except KeyboardInterrupt, exc: + except KeyboardInterrupt: self.bus.log(" hit: shutting down HTTP server") - self.interrupt = exc + self.interrupt = sys.exc_info()[1] self.bus.exit() - except SystemExit, exc: + except SystemExit: self.bus.log("SystemExit raised: shutting down HTTP server") - self.interrupt = exc + self.interrupt = sys.exc_info()[1] self.bus.exit() raise except: - import sys self.interrupt = sys.exc_info()[1] self.bus.log("Error in HTTP server: shutting down", traceback=True, level=40) @@ -120,10 +233,40 @@ class ServerAdapter(object): self.start() +class FlupCGIServer(object): + """Adapter for a flup.server.cgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + self.ready = False + + def start(self): + """Start the CGI server.""" + # We have to instantiate the server class here because its __init__ + # starts a threadpool. If we do it too early, daemonize won't work. + from flup.server.cgi import WSGIServer + + self.cgiserver = WSGIServer(*self.args, **self.kwargs) + self.ready = True + self.cgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + self.ready = False + + class FlupFCGIServer(object): """Adapter for a flup.server.fcgi.WSGIServer.""" def __init__(self, *args, **kwargs): + if kwargs.get('bindAddress', None) is None: + import socket + if not hasattr(socket, 'fromfd'): + raise ValueError( + 'Dynamic FCGI server not available on this platform. ' + 'You must use a static or external one by providing a ' + 'legal bindAddress.') self.args = args self.kwargs = kwargs self.ready = False @@ -199,8 +342,9 @@ def client_host(server_host): if server_host == '0.0.0.0': # 0.0.0.0 is INADDR_ANY, which should answer on localhost. return '127.0.0.1' - if server_host == '::': + if server_host in ('::', '::0', '::0.0.0.0'): # :: is IN6ADDR_ANY, which should answer on localhost. + # ::0 and ::0.0.0.0 are non-canonical but common ways to write IN6ADDR_ANY. return '::1' return server_host @@ -215,8 +359,16 @@ def check_port(host, port, timeout=1.0): # AF_INET or AF_INET6 socket # Get the correct address family for our host (allows IPv6 addresses) - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM) + except socket.gaierror: + if ':' in host: + info = [(socket.AF_INET6, socket.SOCK_STREAM, 0, "", (host, port, 0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", (host, port))] + + for res in info: af, socktype, proto, canonname, sa = res s = None try: @@ -233,34 +385,43 @@ def check_port(host, port, timeout=1.0): if s: s.close() -def wait_for_free_port(host, port): + +# Feel free to increase these defaults on slow systems: +free_port_timeout = 0.2 # Changed by Kovid +occupied_port_timeout = 1.0 + +def wait_for_free_port(host, port, timeout=None): """Wait for the specified port to become free (drop requests).""" if not host: raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = free_port_timeout - for trial in xrange(50): + for trial in range(50): try: # we are expecting a free port, so reduce the timeout - check_port(host, port, timeout=0.2) # Changed by Kovid + check_port(host, port, timeout=timeout) except IOError: # Give the old server thread time to free the port. - time.sleep(0.2) # Changed by Kovid + time.sleep(timeout) else: return raise IOError("Port %r not free on %r" % (port, host)) -def wait_for_occupied_port(host, port): +def wait_for_occupied_port(host, port, timeout=None): """Wait for the specified port to become active (receive requests).""" if not host: raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = occupied_port_timeout - for trial in xrange(50): + for trial in range(50): try: - check_port(host, port) + check_port(host, port, timeout=timeout) except IOError: return else: - time.sleep(.1) + time.sleep(timeout) raise IOError("Port %r not bound on %r" % (port, host)) diff --git a/src/cherrypy/process/win32.py b/src/cherrypy/process/win32.py index 0ca43d5e9b..83f99a5d46 100644 --- a/src/cherrypy/process/win32.py +++ b/src/cherrypy/process/win32.py @@ -1,7 +1,6 @@ """Windows service. Requires pywin32.""" import os -import thread import win32api import win32con import win32event @@ -84,7 +83,7 @@ class Win32Bus(wspbus.Bus): return self.events[state] except KeyError: event = win32event.CreateEvent(None, 0, 0, - u"WSPBus %s Event (pid=%r)" % + "WSPBus %s Event (pid=%r)" % (state.name, os.getpid())) self.events[state] = event return event @@ -97,7 +96,7 @@ class Win32Bus(wspbus.Bus): win32event.PulseEvent(event) state = property(_get_state, _set_state) - def wait(self, state, interval=0.1): + def wait(self, state, interval=0.1, channel=None): """Wait for the given state(s), KeyboardInterrupt or SystemExit. Since this class uses native win32event objects, the interval @@ -128,7 +127,7 @@ class _ControlCodes(dict): def key_for(self, obj): """For the given value, return its corresponding key.""" - for key, val in self.iteritems(): + for key, val in self.items(): if val is obj: return key raise ValueError("The given object could not be found: %r" % obj) diff --git a/src/cherrypy/process/wspbus.py b/src/cherrypy/process/wspbus.py index 5bbcb8c629..6ef768dcbb 100644 --- a/src/cherrypy/process/wspbus.py +++ b/src/cherrypy/process/wspbus.py @@ -20,24 +20,24 @@ autoreload component. Ideally, a Bus object will be flexible enough to be useful in a variety of invocation scenarios: - 1. The deployer starts a site from the command line via a framework- - neutral deployment script; applications from multiple frameworks - are mixed in a single site. Command-line arguments and configuration - files are used to define site-wide components such as the HTTP server, - WSGI component graph, autoreload behavior, signal handling, etc. + 1. The deployer starts a site from the command line via a + framework-neutral deployment script; applications from multiple frameworks + are mixed in a single site. Command-line arguments and configuration + files are used to define site-wide components such as the HTTP server, + WSGI component graph, autoreload behavior, signal handling, etc. 2. The deployer starts a site via some other process, such as Apache; - applications from multiple frameworks are mixed in a single site. - Autoreload and signal handling (from Python at least) are disabled. + applications from multiple frameworks are mixed in a single site. + Autoreload and signal handling (from Python at least) are disabled. 3. The deployer starts a site via a framework-specific mechanism; - for example, when running tests, exploring tutorials, or deploying - single applications from a single framework. The framework controls - which site-wide components are enabled as it sees fit. + for example, when running tests, exploring tutorials, or deploying + single applications from a single framework. The framework controls + which site-wide components are enabled as it sees fit. The Bus object in this package uses topic-based publish-subscribe messaging to accomplish all this. A few topic channels are built in -('start', 'stop', 'exit', and 'graceful'). Frameworks and site containers -are free to define their own. If a message is sent to a channel that has -not been defined or has no listeners, there is no effect. +('start', 'stop', 'exit', 'graceful', 'log', and 'main'). Frameworks and +site containers are free to define their own. If a message is sent to a +channel that has not been defined or has no listeners, there is no effect. In general, there should only ever be a single Bus object per process. Frameworks and site containers share a single Bus object by publishing @@ -46,7 +46,7 @@ messages and subscribing listeners. The Bus object works as a finite state machine which models the current state of the process. Bus methods move it from one state to another; those methods then publish to subscribed listeners on the channel for -the new state. +the new state.:: O | @@ -62,16 +62,49 @@ the new state. import atexit import os -try: - set -except NameError: - from sets import Set as set import sys import threading import time import traceback as _traceback import warnings +from cherrypy._cpcompat import set + +# Here I save the value of os.getcwd(), which, if I am imported early enough, +# will be the directory from which the startup script was run. This is needed +# by _do_execv(), to change back to the original directory before execv()ing a +# new process. This is a defense against the application having changed the +# current working directory (which could make sys.executable "not found" if +# sys.executable is a relative-path, and/or cause other problems). +_startup_cwd = os.getcwd() + +class ChannelFailures(Exception): + """Exception raised when errors occur in a listener during Bus.publish().""" + delimiter = '\n' + + def __init__(self, *args, **kwargs): + # Don't use 'super' here; Exceptions are old-style in Py2.4 + # See http://www.cherrypy.org/ticket/959 + Exception.__init__(self, *args, **kwargs) + self._exceptions = list() + + def handle_exception(self): + """Append the current exception to self.""" + self._exceptions.append(sys.exc_info()[1]) + + def get_instances(self): + """Return a list of seen exception instances.""" + return self._exceptions[:] + + def __str__(self): + exception_strings = map(repr, self.get_instances()) + return self.delimiter.join(exception_strings) + + __repr__ = __str__ + + def __bool__(self): + return bool(self._exceptions) + __nonzero__ = __bool__ # Use a flag to indicate the state of the bus. class _StateEnum(object): @@ -92,6 +125,17 @@ states.STOPPING = states.State() states.EXITING = states.State() +try: + import fcntl +except ImportError: + max_files = 0 +else: + try: + max_files = os.sysconf('SC_OPEN_MAX') + except AttributeError: + max_files = 1024 + + class Bus(object): """Process state-machine and messenger for HTTP site deployment. @@ -105,13 +149,14 @@ class Bus(object): states = states state = states.STOPPED execv = False + max_cloexec_files = max_files def __init__(self): self.execv = False self.state = states.STOPPED self.listeners = dict( [(channel, set()) for channel - in ('start', 'stop', 'exit', 'graceful', 'log')]) + in ('start', 'stop', 'exit', 'graceful', 'log', 'main')]) self._priorities = {} def subscribe(self, channel, callback, priority=None): @@ -136,24 +181,30 @@ class Bus(object): if channel not in self.listeners: return [] - exc = None + exc = ChannelFailures() output = [] items = [(self._priorities[(channel, listener)], listener) for listener in self.listeners[channel]] - items.sort() + try: + items.sort(key=lambda item: item[0]) + except TypeError: + # Python 2.3 had no 'key' arg, but that doesn't matter + # since it could sort dissimilar types just fine. + items.sort() for priority, listener in items: try: output.append(listener(*args, **kwargs)) except KeyboardInterrupt: raise - except SystemExit, e: + except SystemExit: + e = sys.exc_info()[1] # If we have previous errors ensure the exit code is non-zero if exc and e.code == 0: e.code = 1 raise except: - exc = sys.exc_info()[1] + exc.handle_exception() if channel == 'log': # Assume any further messages to 'log' will fail. pass @@ -161,7 +212,7 @@ class Bus(object): self.log("Error in %r listener %r" % (channel, listener), level=40, traceback=True) if exc: - raise + raise exc return output def _clean_exit(self): @@ -189,16 +240,18 @@ class Bus(object): except: self.log("Shutting down due to error in start listener:", level=40, traceback=True) - e_info = sys.exc_info() + e_info = sys.exc_info()[1] try: self.exit() except: # Any stop/exit errors will be logged inside publish(). pass - raise e_info[0], e_info[1], e_info[2] + # Re-raise the original error + raise e_info def exit(self): """Stop all services and prepare to exit the process.""" + exitstate = self.state try: self.stop() @@ -214,6 +267,13 @@ class Bus(object): # can't just let exceptions propagate out unhandled. # Assume it's been logged and just die. os._exit(70) # EX_SOFTWARE + + if exitstate == states.STARTING: + # exit() was called before start() finished, possibly due to + # Ctrl-C because a start listener got stuck. In this case, + # we could get stuck in a loop where Ctrl-C never exits the + # process, so we just call os.exit here. + os._exit(70) # EX_SOFTWARE def restart(self): """Restart the process (may close connections). @@ -239,7 +299,7 @@ class Bus(object): thread perform the actual execv call (required on some platforms). """ try: - self.wait(states.EXITING, interval=interval) + self.wait(states.EXITING, interval=interval, channel='main') except (KeyboardInterrupt, IOError): # The time.sleep call might raise # "IOError: [Errno 4] Interrupted function call" on KBInt. @@ -265,13 +325,14 @@ class Bus(object): else: d = t.isDaemon() if not d: + self.log("Waiting for thread %s." % t.getName()) t.join() if self.execv: self._do_execv() - def wait(self, state, interval=0.1): - """Wait for the given state(s).""" + def wait(self, state, interval=0.1, channel=None): + """Poll for the given state(s) at intervals; publish to channel.""" if isinstance(state, (tuple, list)): states = state else: @@ -280,6 +341,7 @@ class Bus(object): def _wait(): while self.state not in states: time.sleep(interval) + self.publish(channel) # From http://psyco.sourceforge.net/psycoguide/bugs.html: # "The compiled machine code does not include the regular polling @@ -302,11 +364,37 @@ class Bus(object): """ args = sys.argv[:] self.log('Re-spawning %s' % ' '.join(args)) - args.insert(0, sys.executable) - if sys.platform == 'win32': - args = ['"%s"' % arg for arg in args] - os.execv(sys.executable, args) + if sys.platform[:4] == 'java': + from _systemrestart import SystemRestart + raise SystemRestart + else: + args.insert(0, sys.executable) + if sys.platform == 'win32': + args = ['"%s"' % arg for arg in args] + + os.chdir(_startup_cwd) + if self.max_cloexec_files: + self._set_cloexec() + os.execv(sys.executable, args) + + def _set_cloexec(self): + """Set the CLOEXEC flag on all open files (except stdin/out/err). + + If self.max_cloexec_files is an integer (the default), then on + platforms which support it, it represents the max open files setting + for the operating system. This function will be called just before + the process is restarted via os.execv() to prevent open files + from persisting into the new process. + + Set self.max_cloexec_files to 0 to disable this behavior. + """ + for fd in range(3, self.max_cloexec_files): # skip stdin/out/err + try: + flags = fcntl.fcntl(fd, fcntl.F_GETFD) + except IOError: + continue + fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) def stop(self): """Stop all services.""" @@ -338,8 +426,7 @@ class Bus(object): def log(self, msg="", level=20, traceback=False): """Log the given message. Append the last traceback if requested.""" if traceback: - exc = sys.exc_info() - msg += "\n" + "".join(_traceback.format_exception(*exc)) + msg += "\n" + "".join(_traceback.format_exception(*sys.exc_info())) self.publish('log', msg, level) bus = Bus() diff --git a/src/cherrypy/scaffold/__init__.py b/src/cherrypy/scaffold/__init__.py index f50cc213d3..00964ac5f6 100644 --- a/src/cherrypy/scaffold/__init__.py +++ b/src/cherrypy/scaffold/__init__.py @@ -8,7 +8,7 @@ then tweak as desired. Even before any tweaking, this should serve a few demonstration pages. Change to this directory and run: - python cherrypy\cherryd -c cherrypy\scaffold\site.conf + ../cherryd -c site.conf """ diff --git a/src/cherrypy/scaffold/apache-fcgi.conf b/src/cherrypy/scaffold/apache-fcgi.conf new file mode 100644 index 0000000000..922398eaf8 --- /dev/null +++ b/src/cherrypy/scaffold/apache-fcgi.conf @@ -0,0 +1,22 @@ +# Apache2 server conf file for using CherryPy with mod_fcgid. + +# This doesn't have to be "C:/", but it has to be a directory somewhere, and +# MUST match the directory used in the FastCgiExternalServer directive, below. +DocumentRoot "C:/" + +ServerName 127.0.0.1 +Listen 80 +LoadModule fastcgi_module modules/mod_fastcgi.dll +LoadModule rewrite_module modules/mod_rewrite.so + +Options ExecCGI +SetHandler fastcgi-script +RewriteEngine On +# Send requests for any URI to our fastcgi handler. +RewriteRule ^(.*)$ /fastcgi.pyc [L] + +# The FastCgiExternalServer directive defines filename as an external FastCGI application. +# If filename does not begin with a slash (/) then it is assumed to be relative to the ServerRoot. +# The filename does not have to exist in the local filesystem. URIs that Apache resolves to this +# filename will be handled by this external FastCGI application. +FastCgiExternalServer "C:/fastcgi.pyc" -host 127.0.0.1:8088 \ No newline at end of file diff --git a/src/cherrypy/scaffold/site.conf b/src/cherrypy/scaffold/site.conf index 6fc8f4ec34..6ed3898373 100644 --- a/src/cherrypy/scaffold/site.conf +++ b/src/cherrypy/scaffold/site.conf @@ -5,4 +5,10 @@ server.socket_host: "0.0.0.0" server.socket_port: 8088 -tree.myapp: cherrypy.Application(scaffold.root, "/", "cherrypy/scaffold/example.conf") +# Uncomment the following lines to run on HTTPS at the same time +#server.2.socket_host: "0.0.0.0" +#server.2.socket_port: 8433 +#server.2.ssl_certificate: '../test/test.pem' +#server.2.ssl_private_key: '../test/test.pem' + +tree.myapp: cherrypy.Application(scaffold.root, "/", "example.conf") diff --git a/src/cherrypy/wsgiserver/__init__.py b/src/cherrypy/wsgiserver/__init__.py index c380e18b05..ee6190fee1 100644 --- a/src/cherrypy/wsgiserver/__init__.py +++ b/src/cherrypy/wsgiserver/__init__.py @@ -1,1794 +1,14 @@ -"""A high-speed, production ready, thread pooled, generic WSGI server. - -Simplest example on how to use this module directly -(without using CherryPy's application machinery): - - from cherrypy import wsgiserver - - def my_crazy_app(environ, start_response): - status = '200 OK' - response_headers = [('Content-type','text/plain')] - start_response(status, response_headers) - return ['Hello world!\n'] - - server = wsgiserver.CherryPyWSGIServer( - ('0.0.0.0', 8070), my_crazy_app, - server_name='www.cherrypy.example') - -The CherryPy WSGI server can serve as many WSGI applications -as you want in one instance by using a WSGIPathInfoDispatcher: - - d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) - server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) - -Want SSL support? Just set these attributes: - - server.ssl_certificate = - server.ssl_private_key = - - if __name__ == '__main__': - try: - server.start() - except KeyboardInterrupt: - server.stop() - -This won't call the CherryPy engine (application side) at all, only the -WSGI server, which is independant from the rest of CherryPy. Don't -let the name "CherryPyWSGIServer" throw you; the name merely reflects -its origin, not its coupling. - -For those of you wanting to understand internals of this module, here's the -basic call flow. The server's listening thread runs a very tight loop, -sticking incoming connections onto a Queue: - - server = CherryPyWSGIServer(...) - server.start() - while True: - tick() - # This blocks until a request comes in: - child = socket.accept() - conn = HTTPConnection(child, ...) - server.requests.put(conn) - -Worker threads are kept in a pool and poll the Queue, popping off and then -handling each connection in turn. Each connection can consist of an arbitrary -number of requests and their responses, so we run a nested loop: - - while True: - conn = server.requests.get() - conn.communicate() - -> while True: - req = HTTPRequest(...) - req.parse_request() - -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" - req.rfile.readline() - req.read_headers() - req.respond() - -> response = wsgi_app(...) - try: - for chunk in response: - if chunk: - req.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - if req.close_connection: - return -""" - - -import base64 -import os -import Queue -import re -quoted_slash = re.compile("(?i)%2F") -import rfc822 -import socket -try: - import cStringIO as StringIO -except ImportError: - import StringIO - -_fileobject_uses_str_type = isinstance(socket._fileobject(None)._rbuf, basestring) +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] import sys -import threading -import time -import traceback -from urllib import unquote -from urlparse import urlparse -import warnings - -try: - from OpenSSL import SSL - from OpenSSL import crypto -except ImportError: - SSL = None - -import errno - -def plat_specific_errors(*errnames): - """Return error numbers for all errors in errnames on this platform. - - The 'errno' module contains different global constants depending on - the specific platform (OS). This function will return the list of - numeric values for a given list of potential names. - """ - errno_names = dir(errno) - nums = [getattr(errno, k) for k in errnames if k in errno_names] - # de-dupe the list - return dict.fromkeys(nums).keys() - -socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") - -socket_errors_to_ignore = plat_specific_errors( - "EPIPE", - "EBADF", "WSAEBADF", - "ENOTSOCK", "WSAENOTSOCK", - "ETIMEDOUT", "WSAETIMEDOUT", - "ECONNREFUSED", "WSAECONNREFUSED", - "ECONNRESET", "WSAECONNRESET", - "ECONNABORTED", "WSAECONNABORTED", - "ENETRESET", "WSAENETRESET", - "EHOSTDOWN", "EHOSTUNREACH", - ) -socket_errors_to_ignore.append("timed out") - -socket_errors_nonblocking = plat_specific_errors( - 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') - -comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING', - 'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL', - 'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT', - 'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE', - 'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING', - 'WWW-AUTHENTICATE'] - - -class WSGIPathInfoDispatcher(object): - """A WSGI dispatcher for dispatch based on the PATH_INFO. - - apps: a dict or list of (path_prefix, app) pairs. - """ - - def __init__(self, apps): - try: - apps = apps.items() - except AttributeError: - pass - - # Sort the apps by len(path), descending - apps.sort() - apps.reverse() - - # The path_prefix strings must start, but not end, with a slash. - # Use "" instead of "/". - self.apps = [(p.rstrip("/"), a) for p, a in apps] - - def __call__(self, environ, start_response): - path = environ["PATH_INFO"] or "/" - for p, app in self.apps: - # The apps list should be sorted by length, descending. - if path.startswith(p + "/") or path == p: - environ = environ.copy() - environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p - environ["PATH_INFO"] = path[len(p):] - return app(environ, start_response) - - start_response('404 Not Found', [('Content-Type', 'text/plain'), - ('Content-Length', '0')]) - return [''] - - -class MaxSizeExceeded(Exception): - pass - -class SizeCheckWrapper(object): - """Wraps a file-like object, raising MaxSizeExceeded if too large.""" - - def __init__(self, rfile, maxlen): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - - def _check_length(self): - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded() - - def read(self, size=None): - data = self.rfile.read(size) - self.bytes_read += len(data) - self._check_length() - return data - - def readline(self, size=None): - if size is not None: - data = self.rfile.readline(size) - self.bytes_read += len(data) - self._check_length() - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - while True: - data = self.rfile.readline(256) - self.bytes_read += len(data) - self._check_length() - res.append(data) - # See http://www.cherrypy.org/ticket/421 - if len(data) < 256 or data[-1:] == "\n": - return ''.join(res) - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def next(self): - data = self.rfile.next() - self.bytes_read += len(data) - self._check_length() - return data - - -class HTTPRequest(object): - """An HTTP Request (and response). - - A single HTTP connection may consist of multiple request/response pairs. - - send: the 'send' method from the connection's socket object. - wsgi_app: the WSGI application to call. - environ: a partial WSGI environ (server and connection entries). - The caller MUST set the following entries: - * All wsgi.* entries, including .input - * SERVER_NAME and SERVER_PORT - * Any SSL_* entries - * Any custom entries like REMOTE_ADDR and REMOTE_PORT - * SERVER_SOFTWARE: the value to write in the "Server" response header. - * ACTUAL_SERVER_PROTOCOL: the value to write in the Status-Line of - the response. From RFC 2145: "An HTTP server SHOULD send a - response version equal to the highest version for which the - server is at least conditionally compliant, and whose major - version is less than or equal to the one received in the - request. An HTTP server MUST NOT send a version for which - it is not at least conditionally compliant." - - outheaders: a list of header tuples to write in the response. - ready: when True, the request has been parsed and is ready to begin - generating the response. When False, signals the calling Connection - that the response should not be generated and the connection should - close. - close_connection: signals the calling Connection that the request - should close. This does not imply an error! The client and/or - server may each request that the connection be closed. - chunked_write: if True, output will be encoded with the "chunked" - transfer-coding. This value is set automatically inside - send_headers. - """ - - max_request_header_size = 0 - max_request_body_size = 0 - - def __init__(self, wfile, environ, wsgi_app): - self.rfile = environ['wsgi.input'] - self.wfile = wfile - self.environ = environ.copy() - self.wsgi_app = wsgi_app - - self.ready = False - self.started_response = False - self.status = "" - self.outheaders = [] - self.sent_headers = False - self.close_connection = False - self.chunked_write = False - - def parse_request(self): - """Parse the next HTTP request start-line and message-headers.""" - self.rfile.maxlen = self.max_request_header_size - self.rfile.bytes_read = 0 - - try: - self._parse_request() - except MaxSizeExceeded: - self.simple_response("413 Request Entity Too Large") - return - - def _parse_request(self): - # HTTP/1.1 connections are persistent by default. If a client - # requests a page, then idles (leaves the connection open), - # then rfile.readline() will raise socket.error("timed out"). - # Note that it does this based on the value given to settimeout(), - # and doesn't need the client to request or acknowledge the close - # (although your TCP stack might suffer for it: cf Apache's history - # with FIN_WAIT_2). - request_line = self.rfile.readline() - if not request_line: - # Force self.ready = False so the connection will close. - self.ready = False - return - - if request_line == "\r\n": - # RFC 2616 sec 4.1: "...if the server is reading the protocol - # stream at the beginning of a message and receives a CRLF - # first, it should ignore the CRLF." - # But only ignore one leading line! else we enable a DoS. - request_line = self.rfile.readline() - if not request_line: - self.ready = False - return - - environ = self.environ - - try: - method, path, req_protocol = request_line.strip().split(" ", 2) - except ValueError: - self.simple_response(400, "Malformed Request-Line") - return - - environ["REQUEST_METHOD"] = method - - # path may be an abs_path (including "http://host.domain.tld"); - scheme, location, path, params, qs, frag = urlparse(path) - - if frag: - self.simple_response("400 Bad Request", - "Illegal #fragment in Request-URI.") - return - - if scheme: - environ["wsgi.url_scheme"] = scheme - if params: - path = path + ";" + params - - environ["SCRIPT_NAME"] = "" - - # Unquote the path+params (e.g. "/this%20path" -> "this path"). - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 - # - # But note that "...a URI must be separated into its components - # before the escaped characters within those components can be - # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 - atoms = [unquote(x) for x in quoted_slash.split(path)] - path = "%2F".join(atoms) - environ["PATH_INFO"] = path - - # Note that, like wsgiref and most other WSGI servers, - # we unquote the path but not the query string. - environ["QUERY_STRING"] = qs - - # Compare request and server HTTP protocol versions, in case our - # server does not support the requested protocol. Limit our output - # to min(req, server). We want the following output: - # request server actual written supported response - # protocol protocol response protocol feature set - # a 1.0 1.0 1.0 1.0 - # b 1.0 1.1 1.1 1.0 - # c 1.1 1.0 1.0 1.0 - # d 1.1 1.1 1.1 1.1 - # Notice that, in (b), the response will be "HTTP/1.1" even though - # the client only understands 1.0. RFC 2616 10.5.6 says we should - # only return 505 if the _major_ version is different. - rp = int(req_protocol[5]), int(req_protocol[7]) - server_protocol = environ["ACTUAL_SERVER_PROTOCOL"] - sp = int(server_protocol[5]), int(server_protocol[7]) - if sp[0] != rp[0]: - self.simple_response("505 HTTP Version Not Supported") - return - # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. - environ["SERVER_PROTOCOL"] = req_protocol - self.response_protocol = "HTTP/%s.%s" % min(rp, sp) - - # If the Request-URI was an absoluteURI, use its location atom. - if location: - environ["SERVER_NAME"] = location - - # then all the http headers - try: - self.read_headers() - except ValueError, ex: - self.simple_response("400 Bad Request", repr(ex.args)) - return - - mrbs = self.max_request_body_size - if mrbs and int(environ.get("CONTENT_LENGTH", 0)) > mrbs: - self.simple_response("413 Request Entity Too Large") - return - - # Persistent connection support - if self.response_protocol == "HTTP/1.1": - # Both server and client are HTTP/1.1 - if environ.get("HTTP_CONNECTION", "") == "close": - self.close_connection = True - else: - # Either the server or client (or both) are HTTP/1.0 - if environ.get("HTTP_CONNECTION", "") != "Keep-Alive": - self.close_connection = True - - # Transfer-Encoding support - te = None - if self.response_protocol == "HTTP/1.1": - te = environ.get("HTTP_TRANSFER_ENCODING") - if te: - te = [x.strip().lower() for x in te.split(",") if x.strip()] - - self.chunked_read = False - - if te: - for enc in te: - if enc == "chunked": - self.chunked_read = True - else: - # Note that, even if we see "chunked", we must reject - # if there is an extension we don't recognize. - self.simple_response("501 Unimplemented") - self.close_connection = True - return - - # From PEP 333: - # "Servers and gateways that implement HTTP 1.1 must provide - # transparent support for HTTP 1.1's "expect/continue" mechanism. - # This may be done in any of several ways: - # 1. Respond to requests containing an Expect: 100-continue request - # with an immediate "100 Continue" response, and proceed normally. - # 2. Proceed with the request normally, but provide the application - # with a wsgi.input stream that will send the "100 Continue" - # response if/when the application first attempts to read from - # the input stream. The read request must then remain blocked - # until the client responds. - # 3. Wait until the client decides that the server does not support - # expect/continue, and sends the request body on its own. - # (This is suboptimal, and is not recommended.) - # - # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, - # but it seems like it would be a big slowdown for such a rare case. - if environ.get("HTTP_EXPECT", "") == "100-continue": - self.simple_response(100) - - self.ready = True - - def read_headers(self): - """Read header lines from the incoming stream.""" - environ = self.environ - - while True: - line = self.rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - if line == '\r\n': - # Normal end of headers - break - - if line[0] in ' \t': - # It's a continuation line. - v = line.strip() - else: - k, v = line.split(":", 1) - k, v = k.strip().upper(), v.strip() - envname = "HTTP_" + k.replace("-", "_") - - if k in comma_separated_headers: - existing = environ.get(envname) - if existing: - v = ", ".join((existing, v)) - environ[envname] = v - - ct = environ.pop("HTTP_CONTENT_TYPE", None) - if ct is not None: - environ["CONTENT_TYPE"] = ct - cl = environ.pop("HTTP_CONTENT_LENGTH", None) - if cl is not None: - environ["CONTENT_LENGTH"] = cl - - def decode_chunked(self): - """Decode the 'chunked' transfer coding.""" - cl = 0 - data = StringIO.StringIO() - while True: - line = self.rfile.readline().strip().split(";", 1) - chunk_size = int(line.pop(0), 16) - if chunk_size <= 0: - break -## if line: chunk_extension = line[0] - cl += chunk_size - data.write(self.rfile.read(chunk_size)) - crlf = self.rfile.read(2) - if crlf != "\r\n": - self.simple_response("400 Bad Request", - "Bad chunked transfer coding " - "(expected '\\r\\n', got %r)" % crlf) - return - - # Grab any trailer headers - self.read_headers() - - data.seek(0) - self.environ["wsgi.input"] = data - self.environ["CONTENT_LENGTH"] = str(cl) or "" - return True - - def respond(self): - """Call the appropriate WSGI app and write its iterable output.""" - # Set rfile.maxlen to ensure we don't read past Content-Length. - # This will also be used to read the entire request body if errors - # are raised before the app can read the body. - if self.chunked_read: - # If chunked, Content-Length will be 0. - self.rfile.maxlen = self.max_request_body_size - else: - cl = int(self.environ.get("CONTENT_LENGTH", 0)) - if self.max_request_body_size: - self.rfile.maxlen = min(cl, self.max_request_body_size) - else: - self.rfile.maxlen = cl - self.rfile.bytes_read = 0 - - try: - self._respond() - except MaxSizeExceeded: - if not self.sent_headers: - self.simple_response("413 Request Entity Too Large") - return - - def _respond(self): - if self.chunked_read: - if not self.decode_chunked(): - self.close_connection = True - return - - response = self.wsgi_app(self.environ, self.start_response) - try: - for chunk in response: - # "The start_response callable must not actually transmit - # the response headers. Instead, it must store them for the - # server or gateway to transmit only after the first - # iteration of the application return value that yields - # a NON-EMPTY string, or upon the application's first - # invocation of the write() callable." (PEP 333) - if chunk: - self.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - - if (self.ready and not self.sent_headers): - self.sent_headers = True - self.send_headers() - if self.chunked_write: - self.wfile.sendall("0\r\n\r\n") - - def simple_response(self, status, msg=""): - """Write a simple response back to the client.""" - status = str(status) - buf = ["%s %s\r\n" % (self.environ['ACTUAL_SERVER_PROTOCOL'], status), - "Content-Length: %s\r\n" % len(msg), - "Content-Type: text/plain\r\n"] - - if status[:3] == "413" and self.response_protocol == 'HTTP/1.1': - # Request Entity Too Large - self.close_connection = True - buf.append("Connection: close\r\n") - - buf.append("\r\n") - if msg: - buf.append(msg) - - try: - self.wfile.sendall("".join(buf)) - except socket.error, x: - if x.args[0] not in socket_errors_to_ignore: - raise - - def start_response(self, status, headers, exc_info = None): - """WSGI callable to begin the HTTP response.""" - # "The application may call start_response more than once, - # if and only if the exc_info argument is provided." - if self.started_response and not exc_info: - raise AssertionError("WSGI start_response called a second " - "time with no exc_info.") - - # "if exc_info is provided, and the HTTP headers have already been - # sent, start_response must raise an error, and should raise the - # exc_info tuple." - if self.sent_headers: - try: - raise exc_info[0], exc_info[1], exc_info[2] - finally: - exc_info = None - - self.started_response = True - self.status = status - self.outheaders.extend(headers) - return self.write - - def write(self, chunk): - """WSGI callable to write unbuffered data to the client. - - This method is also used internally by start_response (to write - data from the iterable returned by the WSGI application). - """ - if not self.started_response: - raise AssertionError("WSGI write called before start_response.") - - if not self.sent_headers: - self.sent_headers = True - self.send_headers() - - if self.chunked_write and chunk: - buf = [hex(len(chunk))[2:], "\r\n", chunk, "\r\n"] - self.wfile.sendall("".join(buf)) - else: - self.wfile.sendall(chunk) - - def send_headers(self): - """Assert, process, and send the HTTP response message-headers.""" - hkeys = [key.lower() for key, value in self.outheaders] - status = int(self.status[:3]) - - if status == 413: - # Request Entity Too Large. Close conn to avoid garbage. - self.close_connection = True - elif "content-length" not in hkeys: - # "All 1xx (informational), 204 (no content), - # and 304 (not modified) responses MUST NOT - # include a message-body." So no point chunking. - if status < 200 or status in (204, 205, 304): - pass - else: - if (self.response_protocol == 'HTTP/1.1' - and self.environ["REQUEST_METHOD"] != 'HEAD'): - # Use the chunked transfer-coding - self.chunked_write = True - self.outheaders.append(("Transfer-Encoding", "chunked")) - else: - # Closing the conn is the only way to determine len. - self.close_connection = True - - if "connection" not in hkeys: - if self.response_protocol == 'HTTP/1.1': - # Both server and client are HTTP/1.1 or better - if self.close_connection: - self.outheaders.append(("Connection", "close")) - else: - # Server and/or client are HTTP/1.0 - if not self.close_connection: - self.outheaders.append(("Connection", "Keep-Alive")) - - if (not self.close_connection) and (not self.chunked_read): - # Read any remaining request body data on the socket. - # "If an origin server receives a request that does not include an - # Expect request-header field with the "100-continue" expectation, - # the request includes a request body, and the server responds - # with a final status code before reading the entire request body - # from the transport connection, then the server SHOULD NOT close - # the transport connection until it has read the entire request, - # or until the client closes the connection. Otherwise, the client - # might not reliably receive the response message. However, this - # requirement is not be construed as preventing a server from - # defending itself against denial-of-service attacks, or from - # badly broken client implementations." - size = self.rfile.maxlen - self.rfile.bytes_read - if size > 0: - self.rfile.read(size) - - if "date" not in hkeys: - self.outheaders.append(("Date", rfc822.formatdate())) - - if "server" not in hkeys: - self.outheaders.append(("Server", self.environ['SERVER_SOFTWARE'])) - - buf = [self.environ['ACTUAL_SERVER_PROTOCOL'], " ", self.status, "\r\n"] - try: - buf += [k + ": " + v + "\r\n" for k, v in self.outheaders] - except TypeError: - if not isinstance(k, str): - raise TypeError("WSGI response header key %r is not a string.") - if not isinstance(v, str): - raise TypeError("WSGI response header value %r is not a string.") - else: - raise - buf.append("\r\n") - self.wfile.sendall("".join(buf)) - - -class NoSSLError(Exception): - """Exception raised when a client speaks HTTP to an HTTPS socket.""" - pass - - -class FatalSSLAlert(Exception): - """Exception raised when the SSL implementation signals a fatal alert.""" - pass - - -if not _fileobject_uses_str_type: - class CP_fileobject(socket._fileobject): - """Faux file object attached to a socket object.""" - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - return self._sock.send(data) - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - return self._sock.recv(size) - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(rbufsize) - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - data = self.recv(left) - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self.recv - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(self._rbufsize) - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(self._rbufsize) - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - +if sys.version_info < (3, 0): + from wsgiserver2 import * else: - class CP_fileobject(socket._fileobject): - """Faux file object attached to a socket object.""" - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - return self._sock.send(data) - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - return self._sock.recv(size) - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - def read(self, size=-1): - if size < 0: - # Read until EOF - buffers = [self._rbuf] - self._rbuf = "" - if self._rbufsize <= 1: - recv_size = self.default_bufsize - else: - recv_size = self._rbufsize - - while True: - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - return "".join(buffers) - else: - # Read until size bytes or EOF seen, whichever comes first - data = self._rbuf - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - left = size - buf_len - recv_size = max(self._rbufsize, left) - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - def readline(self, size=-1): - data = self._rbuf - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - assert data == "" - buffers = [] - while data != "\n": - data = self.recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - return "".join(buffers) - else: - # Read until size bytes or \n or EOF seen, whichever comes first - nl = data.find('\n', 0, size) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - left = size - buf_len - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - -class SSL_fileobject(CP_fileobject): - """SSL file object attached to a socket object.""" - - ssl_timeout = 3 - ssl_retry = .01 - - def _safe_call(self, is_reader, call, *args, **kwargs): - """Wrap the given call with SSL error-trapping. - - is_reader: if False EOF errors will be raised. If True, EOF errors - will return "" (to emulate normal sockets). - """ - start = time.time() - while True: - try: - return call(*args, **kwargs) - except SSL.WantReadError: - # Sleep and try again. This is dangerous, because it means - # the rest of the stack has no way of differentiating - # between a "new handshake" error and "client dropped". - # Note this isn't an endless loop: there's a timeout below. - time.sleep(self.ssl_retry) - except SSL.WantWriteError: - time.sleep(self.ssl_retry) - except SSL.SysCallError, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - errnum = e.args[0] - if is_reader and errnum in socket_errors_to_ignore: - return "" - raise socket.error(errnum) - except SSL.Error, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - thirdarg = None - try: - thirdarg = e.args[0][0][2] - except IndexError: - pass - - if thirdarg == 'http request': - # The client is talking HTTP to an HTTPS server. - raise NoSSLError() - raise FatalSSLAlert(*e.args) - except: - raise - - if time.time() - start > self.ssl_timeout: - raise socket.timeout("timed out") - - def recv(self, *args, **kwargs): - buf = [] - r = super(SSL_fileobject, self).recv - while True: - data = self._safe_call(True, r, *args, **kwargs) - buf.append(data) - p = self._sock.pending() - if not p: - return "".join(buf) - - def sendall(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).sendall, *args, **kwargs) - - def send(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).send, *args, **kwargs) - - -class HTTPConnection(object): - """An HTTP connection (active socket). - - socket: the raw socket object (usually TCP) for this connection. - wsgi_app: the WSGI application for this server/connection. - environ: a WSGI environ template. This will be copied for each request. - - rfile: a fileobject for reading from the socket. - send: a function for writing (+ flush) to the socket. - """ - - rbufsize = -1 - RequestHandlerClass = HTTPRequest - environ = {"wsgi.version": (1, 0), - "wsgi.url_scheme": "http", - "wsgi.multithread": True, - "wsgi.multiprocess": False, - "wsgi.run_once": False, - "wsgi.errors": sys.stderr, - } - - def __init__(self, sock, wsgi_app, environ): - self.socket = sock - self.wsgi_app = wsgi_app - - # Copy the class environ into self. - self.environ = self.environ.copy() - self.environ.update(environ) - - if SSL and isinstance(sock, SSL.ConnectionType): - timeout = sock.gettimeout() - self.rfile = SSL_fileobject(sock, "rb", self.rbufsize) - self.rfile.ssl_timeout = timeout - self.wfile = SSL_fileobject(sock, "wb", -1) - self.wfile.ssl_timeout = timeout - else: - self.rfile = CP_fileobject(sock, "rb", self.rbufsize) - self.wfile = CP_fileobject(sock, "wb", -1) - - # Wrap wsgi.input but not HTTPConnection.rfile itself. - # We're also not setting maxlen yet; we'll do that separately - # for headers and body for each iteration of self.communicate - # (if maxlen is 0 the wrapper doesn't check length). - self.environ["wsgi.input"] = SizeCheckWrapper(self.rfile, 0) - - def communicate(self): - """Read each request and respond appropriately.""" - try: - while True: - # (re)set req to None so that if something goes wrong in - # the RequestHandlerClass constructor, the error doesn't - # get written to the previous request. - req = None - req = self.RequestHandlerClass(self.wfile, self.environ, - self.wsgi_app) - - # This order of operations should guarantee correct pipelining. - req.parse_request() - if not req.ready: - return - - req.respond() - if req.close_connection: - return - - except socket.error, e: - errnum = e.args[0] - if errnum == 'timed out': - if req and not req.sent_headers: - req.simple_response("408 Request Timeout") - elif errnum not in socket_errors_to_ignore: - if req and not req.sent_headers: - req.simple_response("500 Internal Server Error", - format_exc()) - return - except (KeyboardInterrupt, SystemExit): - raise - except FatalSSLAlert, e: - # Close the connection. - return - except NoSSLError: - if req and not req.sent_headers: - # Unwrap our wfile - req.wfile = CP_fileobject(self.socket._sock, "wb", -1) - req.simple_response("400 Bad Request", - "The client sent a plain HTTP request, but " - "this server only speaks HTTPS on this port.") - self.linger = True - except Exception, e: - if req and not req.sent_headers: - req.simple_response("500 Internal Server Error", format_exc()) - - linger = False - - def close(self): - """Close the socket underlying this connection.""" - self.rfile.close() - - if not self.linger: - # Python's socket module does NOT call close on the kernel socket - # when you call socket.close(). We do so manually here because we - # want this server to send a FIN TCP segment immediately. Note this - # must be called *before* calling socket.close(), because the latter - # drops its reference to the kernel socket. - self.socket._sock.close() - self.socket.close() - else: - # On the other hand, sometimes we want to hang around for a bit - # to make sure the client has a chance to read our entire - # response. Skipping the close() calls here delays the FIN - # packet until the socket object is garbage-collected later. - # Someday, perhaps, we'll do the full lingering_close that - # Apache does, but not today. - pass - - -def format_exc(limit=None): - """Like print_exc() but return a string. Backport for Python 2.3.""" - try: - etype, value, tb = sys.exc_info() - return ''.join(traceback.format_exception(etype, value, tb, limit)) - finally: - etype = value = tb = None - - -_SHUTDOWNREQUEST = None - -class WorkerThread(threading.Thread): - """Thread which continuously polls a Queue for Connection objects. - - server: the HTTP Server which spawned this thread, and which owns the - Queue and is placing active connections into it. - ready: a simple flag for the calling server to know when this thread - has begun polling the Queue. - - Due to the timing issues of polling a Queue, a WorkerThread does not - check its own 'ready' flag after it has started. To stop the thread, - it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue - (one for each running WorkerThread). - """ - - conn = None - - def __init__(self, server): - self.ready = False - self.server = server - threading.Thread.__init__(self) - - def run(self): - try: - self.ready = True - while True: - conn = self.server.requests.get() - if conn is _SHUTDOWNREQUEST: - return - - self.conn = conn - try: - conn.communicate() - finally: - conn.close() - self.conn = None - except (KeyboardInterrupt, SystemExit), exc: - self.server.interrupt = exc - - -class ThreadPool(object): - """A Request Queue for the CherryPyWSGIServer which pools threads. - - ThreadPool objects must provide min, get(), put(obj), start() - and stop(timeout) attributes. - """ - - def __init__(self, server, min=10, max=-1): - self.server = server - self.min = min - self.max = max - self._threads = [] - self._queue = Queue.Queue() - self.get = self._queue.get - - def start(self): - """Start the pool of threads.""" - for i in xrange(self.min): - self._threads.append(WorkerThread(self.server)) - for worker in self._threads: - worker.setName("CP WSGIServer " + worker.getName()) - worker.start() - for worker in self._threads: - while not worker.ready: - time.sleep(.1) - - def _get_idle(self): - """Number of worker threads which are idle. Read-only.""" - return len([t for t in self._threads if t.conn is None]) - idle = property(_get_idle, doc=_get_idle.__doc__) - - def put(self, obj): - self._queue.put(obj) - if obj is _SHUTDOWNREQUEST: - return - - def grow(self, amount): - """Spawn new worker threads (not above self.max).""" - for i in xrange(amount): - if self.max > 0 and len(self._threads) >= self.max: - break - worker = WorkerThread(self.server) - worker.setName("CP WSGIServer " + worker.getName()) - self._threads.append(worker) - worker.start() - - def shrink(self, amount): - """Kill off worker threads (not below self.min).""" - # Grow/shrink the pool if necessary. - # Remove any dead threads from our list - for t in self._threads: - if not t.isAlive(): - self._threads.remove(t) - amount -= 1 - - if amount > 0: - for i in xrange(min(amount, len(self._threads) - self.min)): - # Put a number of shutdown requests on the queue equal - # to 'amount'. Once each of those is processed by a worker, - # that worker will terminate and be culled from our list - # in self.put. - self._queue.put(_SHUTDOWNREQUEST) - - def stop(self, timeout=5): - # Must shut down threads here so the code that calls - # this method can know when all threads are stopped. - for worker in self._threads: - self._queue.put(_SHUTDOWNREQUEST) - - # Don't join currentThread (when stop is called inside a request). - current = threading.currentThread() - while self._threads: - worker = self._threads.pop() - if worker is not current and worker.isAlive(): - try: - if timeout is None or timeout < 0: - worker.join() - else: - worker.join(timeout) - if worker.isAlive(): - # We exhausted the timeout. - # Forcibly shut down the socket. - c = worker.conn - if c and not c.rfile.closed: - if SSL and isinstance(c.socket, SSL.ConnectionType): - # pyOpenSSL.socket.shutdown takes no args - c.socket.shutdown() - else: - c.socket.shutdown(socket.SHUT_RD) - worker.join() - except (AssertionError, - # Ignore repeated Ctrl-C. - # See http://www.cherrypy.org/ticket/691. - KeyboardInterrupt), exc1: - pass - - - -class SSLConnection: - """A thread-safe wrapper for an SSL.Connection. - - *args: the arguments to create the wrapped SSL.Connection(*args). - """ - - def __init__(self, *args): - self._ssl_conn = SSL.Connection(*args) - self._lock = threading.RLock() - - for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', - 'renegotiate', 'bind', 'listen', 'connect', 'accept', - 'setblocking', 'fileno', 'shutdown', 'close', 'get_cipher_list', - 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', - 'makefile', 'get_app_data', 'set_app_data', 'state_string', - 'sock_shutdown', 'get_peer_certificate', 'want_read', - 'want_write', 'set_connect_state', 'set_accept_state', - 'connect_ex', 'sendall', 'settimeout'): - exec """def %s(self, *args): - self._lock.acquire() - try: - return self._ssl_conn.%s(*args) - finally: - self._lock.release() -""" % (f, f) - - -try: - import fcntl -except ImportError: - try: - from ctypes import windll, WinError - except ImportError: - def prevent_socket_inheritance(sock): - """Dummy function, since neither fcntl nor ctypes are available.""" - pass - else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (Windows).""" - if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): - raise WinError() -else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (POSIX).""" - fd = sock.fileno() - old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) - - -class CherryPyWSGIServer(object): - """An HTTP server for WSGI. - - bind_addr: The interface on which to listen for connections. - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string. - wsgi_app: the WSGI 'application callable'; multiple WSGI applications - may be passed as (path_prefix, app) pairs. - numthreads: the number of worker threads to create (default 10). - server_name: the string to set for WSGI's SERVER_NAME environ entry. - Defaults to socket.gethostname(). - max: the maximum number of queued requests (defaults to -1 = no limit). - request_queue_size: the 'backlog' argument to socket.listen(); - specifies the maximum number of queued connections (default 5). - timeout: the timeout in seconds for accepted connections (default 10). - - nodelay: if True (the default since 3.1), sets the TCP_NODELAY socket - option. - - protocol: the version string to write in the Status-Line of all - HTTP responses. For example, "HTTP/1.1" (the default). This - also limits the supported features used in the response. - - - SSL/HTTPS - --------- - The OpenSSL module must be importable for SSL functionality. - You can obtain it from http://pyopenssl.sourceforge.net/ - - ssl_certificate: the filename of the server SSL certificate. - ssl_privatekey: the filename of the server's private key file. - - If either of these is None (both are None by default), this server - will not use SSL. If both are given and are valid, they will be read - on server start and used in the SSL context for the listening socket. - """ - - protocol = "HTTP/1.1" - _bind_addr = "127.0.0.1" - version = "CherryPy/3.1.2" - ready = False - _interrupt = None - - nodelay = True - - ConnectionClass = HTTPConnection - environ = {} - - # Paths to certificate and private key files - ssl_certificate = None - ssl_private_key = None - - def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, - max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): - self.requests = ThreadPool(self, min=numthreads or 1, max=max) - - if callable(wsgi_app): - # We've been handed a single wsgi_app, in CP-2.1 style. - # Assume it's mounted at "". - self.wsgi_app = wsgi_app - else: - # We've been handed a list of (path_prefix, wsgi_app) tuples, - # so that the server can call different wsgi_apps, and also - # correctly set SCRIPT_NAME. - warnings.warn("The ability to pass multiple apps is deprecated " - "and will be removed in 3.2. You should explicitly " - "include a WSGIPathInfoDispatcher instead.", - DeprecationWarning) - self.wsgi_app = WSGIPathInfoDispatcher(wsgi_app) - - self.bind_addr = bind_addr - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.request_queue_size = request_queue_size - - self.timeout = timeout - self.shutdown_timeout = shutdown_timeout - - def _get_numthreads(self): - return self.requests.min - def _set_numthreads(self, value): - self.requests.min = value - numthreads = property(_get_numthreads, _set_numthreads) - - def __str__(self): - return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, - self.bind_addr) - - def _get_bind_addr(self): - return self._bind_addr - def _set_bind_addr(self, value): - if isinstance(value, tuple) and value[0] in ('', None): - # Despite the socket module docs, using '' does not - # allow AI_PASSIVE to work. Passing None instead - # returns '0.0.0.0' like we want. In other words: - # host AI_PASSIVE result - # '' Y 192.168.x.y - # '' N 192.168.x.y - # None Y 0.0.0.0 - # None N 127.0.0.1 - # But since you can get the same effect with an explicit - # '0.0.0.0', we deny both the empty string and None as values. - raise ValueError("Host values of '' or None are not allowed. " - "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " - "to listen on all active interfaces.") - self._bind_addr = value - bind_addr = property(_get_bind_addr, _set_bind_addr, - doc="""The interface on which to listen for connections. - - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string.""") - - def start(self): - """Run the server forever.""" - # We don't have to trap KeyboardInterrupt or SystemExit here, - # because cherrpy.server already does so, calling self.stop() for us. - # If you're using this server with another framework, you should - # trap those exceptions in whatever code block calls start(). - self._interrupt = None - - # Select the appropriate socket - if isinstance(self.bind_addr, basestring): - # AF_UNIX socket - - # So we can reuse the socket... - try: os.unlink(self.bind_addr) - except: pass - - # So everyone can access the socket... - try: os.chmod(self.bind_addr, 0777) - except: pass - - info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] - else: - # AF_INET or AF_INET6 socket - # Get the correct address family for our host (allows IPv6 addresses) - host, port = self.bind_addr - try: - info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM, 0, socket.AI_PASSIVE) - except socket.gaierror: - # Probably a DNS issue. Assume IPv4. - info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", self.bind_addr)] - - self.socket = None - msg = "No socket could be created" - for res in info: - af, socktype, proto, canonname, sa = res - try: - self.bind(af, socktype, proto) - except socket.error, msg: - if self.socket: - self.socket.close() - self.socket = None - continue - break - if not self.socket: - raise socket.error, msg - - # Timeout so KeyboardInterrupt can be caught on Win32 - self.socket.settimeout(1) - self.socket.listen(self.request_queue_size) - - # Create worker threads - self.requests.start() - - self.ready = True - while self.ready: - self.tick() - if self.interrupt: - while self.interrupt is True: - # Wait for self.stop() to complete. See _set_interrupt. - time.sleep(0.1) - if self.interrupt: - raise self.interrupt - - def bind(self, family, type, proto=0): - """Create (or recreate) the actual socket object.""" - self.socket = socket.socket(family, type, proto) - prevent_socket_inheritance(self.socket) - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - if self.nodelay: - self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - if self.ssl_certificate and self.ssl_private_key: - if SSL is None: - raise ImportError("You must install pyOpenSSL to use HTTPS.") - - # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 - ctx = SSL.Context(SSL.SSLv23_METHOD) - ctx.use_privatekey_file(self.ssl_private_key) - ctx.use_certificate_file(self.ssl_certificate) - self.socket = SSLConnection(ctx, self.socket) - self.populate_ssl_environ() - - # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), - # activate dual-stack. See http://www.cherrypy.org/ticket/871. - if (not isinstance(self.bind_addr, basestring) - and self.bind_addr[0] == '::' and family == socket.AF_INET6): - try: - self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) - except (AttributeError, socket.error): - # Apparently, the socket option is not available in - # this machine's TCP stack - pass - - self.socket.bind(self.bind_addr) - - def tick(self): - """Accept a new connection and put it on the Queue.""" - try: - s, addr = self.socket.accept() - prevent_socket_inheritance(s) - if not self.ready: - return - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - environ = self.environ.copy() - # SERVER_SOFTWARE is common for IIS. It's also helpful for - # us to pass a default value for the "Server" response header. - if environ.get("SERVER_SOFTWARE") is None: - environ["SERVER_SOFTWARE"] = "%s WSGI Server" % self.version - # set a non-standard environ entry so the WSGI app can know what - # the *real* server protocol is (and what features to support). - # See http://www.faqs.org/rfcs/rfc2145.html. - environ["ACTUAL_SERVER_PROTOCOL"] = self.protocol - environ["SERVER_NAME"] = self.server_name - - if isinstance(self.bind_addr, basestring): - # AF_UNIX. This isn't really allowed by WSGI, which doesn't - # address unix domain sockets. But it's better than nothing. - environ["SERVER_PORT"] = "" - else: - environ["SERVER_PORT"] = str(self.bind_addr[1]) - # optional values - # Until we do DNS lookups, omit REMOTE_HOST - environ["REMOTE_ADDR"] = addr[0] - environ["REMOTE_PORT"] = str(addr[1]) - - conn = self.ConnectionClass(s, self.wsgi_app, environ) - self.requests.put(conn) - except socket.timeout: - # The only reason for the timeout in start() is so we can - # notice keyboard interrupts on Win32, which don't interrupt - # accept() by default - return - except socket.error, x: - if x.args[0] in socket_error_eintr: - # I *think* this is right. EINTR should occur when a signal - # is received during the accept() call; all docs say retry - # the call, and I *think* I'm reading it right that Python - # will then go ahead and poll for and handle the signal - # elsewhere. See http://www.cherrypy.org/ticket/707. - return - if x.args[0] in socket_errors_nonblocking: - # Just try again. See http://www.cherrypy.org/ticket/479. - return - if x.args[0] in socket_errors_to_ignore: - # Our socket was closed. - # See http://www.cherrypy.org/ticket/686. - return - raise - - def _get_interrupt(self): - return self._interrupt - def _set_interrupt(self, interrupt): - self._interrupt = True - self.stop() - self._interrupt = interrupt - interrupt = property(_get_interrupt, _set_interrupt, - doc="Set this to an Exception instance to " - "interrupt the server.") - - def stop(self): - """Gracefully shutdown a server that is serving forever.""" - self.ready = False - - sock = getattr(self, "socket", None) - if sock: - if not isinstance(self.bind_addr, basestring): - # Touch our own socket to make accept() return immediately. - try: - host, port = sock.getsockname()[:2] - except socket.error, x: - if x.args[0] not in socket_errors_to_ignore: - raise - else: - # Note that we're explicitly NOT using AI_PASSIVE, - # here, because we want an actual IP to touch. - # localhost won't work if we've bound to a public IP, - # but it will if we bound to '0.0.0.0' (INADDR_ANY). - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - s = None - try: - s = socket.socket(af, socktype, proto) - # See http://groups.google.com/group/cherrypy-users/ - # browse_frm/thread/bbfe5eb39c904fe0 - s.settimeout(1.0) - s.connect((host, port)) - s.close() - except socket.error: - if s: - s.close() - if hasattr(sock, "close"): - sock.close() - self.socket = None - - self.requests.stop(self.shutdown_timeout) - - def populate_ssl_environ(self): - """Create WSGI environ entries to be merged into each request.""" - cert = open(self.ssl_certificate, 'rb').read() - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) - ssl_environ = { - "wsgi.url_scheme": "https", - "HTTPS": "on", - # pyOpenSSL doesn't provide access to any of these AFAICT -## 'SSL_PROTOCOL': 'SSLv2', -## SSL_CIPHER string The cipher specification name -## SSL_VERSION_INTERFACE string The mod_ssl program version -## SSL_VERSION_LIBRARY string The OpenSSL program version - } - - # Server certificate attributes - ssl_environ.update({ - 'SSL_SERVER_M_VERSION': cert.get_version(), - 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), -## 'SSL_SERVER_V_START': Validity of server's certificate (start time), -## 'SSL_SERVER_V_END': Validity of server's certificate (end time), - }) - - for prefix, dn in [("I", cert.get_issuer()), - ("S", cert.get_subject())]: - # X509Name objects don't seem to have a way to get the - # complete DN string. Use str() and slice it instead, - # because str(dn) == "" - dnstr = str(dn)[18:-2] - - wsgikey = 'SSL_SERVER_%s_DN' % prefix - ssl_environ[wsgikey] = dnstr - - # The DN should be of the form: /k1=v1/k2=v2, but we must allow - # for any value to contain slashes itself (in a URL). - while dnstr: - pos = dnstr.rfind("=") - dnstr, value = dnstr[:pos], dnstr[pos + 1:] - pos = dnstr.rfind("/") - dnstr, key = dnstr[:pos], dnstr[pos + 1:] - if key and value: - wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) - ssl_environ[wsgikey] = value - - self.environ.update(ssl_environ) - + # Le sigh. Boo for backward-incompatible syntax. + exec('from .wsgiserver3 import *') diff --git a/src/cherrypy/wsgiserver/ssl_builtin.py b/src/cherrypy/wsgiserver/ssl_builtin.py new file mode 100644 index 0000000000..03bf05deed --- /dev/null +++ b/src/cherrypy/wsgiserver/ssl_builtin.py @@ -0,0 +1,91 @@ +"""A library for integrating Python's builtin ``ssl`` library with CherryPy. + +The ssl module must be importable for SSL functionality. + +To use this module, set ``CherryPyWSGIServer.ssl_adapter`` to an instance of +``BuiltinSSLAdapter``. +""" + +try: + import ssl +except ImportError: + ssl = None + +try: + from _pyio import DEFAULT_BUFFER_SIZE +except ImportError: + try: + from io import DEFAULT_BUFFER_SIZE + except ImportError: + DEFAULT_BUFFER_SIZE = -1 + +import sys + +from cherrypy import wsgiserver + + +class BuiltinSSLAdapter(wsgiserver.SSLAdapter): + """A wrapper for integrating Python's builtin ssl module with CherryPy.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if ssl is None: + raise ImportError("You must install the ssl module to use HTTPS.") + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def bind(self, sock): + """Wrap and return the given socket.""" + return sock + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + try: + s = ssl.wrap_socket(sock, do_handshake_on_connect=True, + server_side=True, certfile=self.certificate, + keyfile=self.private_key, ssl_version=ssl.PROTOCOL_SSLv23) + except ssl.SSLError: + e = sys.exc_info()[1] + if e.errno == ssl.SSL_ERROR_EOF: + # This is almost certainly due to the cherrypy engine + # 'pinging' the socket to assert it's connectable; + # the 'ping' isn't SSL. + return None, {} + elif e.errno == ssl.SSL_ERROR_SSL: + if e.args[1].endswith('http request'): + # The client is speaking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError + elif e.args[1].endswith('unknown protocol'): + # The client is speaking some non-HTTP protocol. + # Drop the conn. + return None, {} + raise + return s, self.get_environ(s) + + # TODO: fill this out more with mod ssl env + def get_environ(self, sock): + """Create WSGI environ entries to be merged into each request.""" + cipher = sock.cipher() + ssl_environ = { + "wsgi.url_scheme": "https", + "HTTPS": "on", + 'SSL_PROTOCOL': cipher[1], + 'SSL_CIPHER': cipher[0] +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + return ssl_environ + + if sys.version_info >= (3, 0): + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_makefile(sock, mode, bufsize) + else: + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_fileobject(sock, mode, bufsize) + diff --git a/src/cherrypy/wsgiserver/ssl_pyopenssl.py b/src/cherrypy/wsgiserver/ssl_pyopenssl.py new file mode 100644 index 0000000000..f3d9bf54b8 --- /dev/null +++ b/src/cherrypy/wsgiserver/ssl_pyopenssl.py @@ -0,0 +1,256 @@ +"""A library for integrating pyOpenSSL with CherryPy. + +The OpenSSL module must be importable for SSL functionality. +You can obtain it from http://pyopenssl.sourceforge.net/ + +To use this module, set CherryPyWSGIServer.ssl_adapter to an instance of +SSLAdapter. There are two ways to use SSL: + +Method One +---------- + + * ``ssl_adapter.context``: an instance of SSL.Context. + +If this is not None, it is assumed to be an SSL.Context instance, +and will be passed to SSL.Connection on bind(). The developer is +responsible for forming a valid Context object. This approach is +to be preferred for more flexibility, e.g. if the cert and key are +streams instead of files, or need decryption, or SSL.SSLv3_METHOD +is desired instead of the default SSL.SSLv23_METHOD, etc. Consult +the pyOpenSSL documentation for complete options. + +Method Two (shortcut) +--------------------- + + * ``ssl_adapter.certificate``: the filename of the server SSL certificate. + * ``ssl_adapter.private_key``: the filename of the server's private key file. + +Both are None by default. If ssl_adapter.context is None, but .private_key +and .certificate are both given and valid, they will be read, and the +context will be automatically created from them. +""" + +import socket +import threading +import time + +from cherrypy import wsgiserver + +try: + from OpenSSL import SSL + from OpenSSL import crypto +except ImportError: + SSL = None + + +class SSL_fileobject(wsgiserver.CP_fileobject): + """SSL file object attached to a socket object.""" + + ssl_timeout = 3 + ssl_retry = .01 + + def _safe_call(self, is_reader, call, *args, **kwargs): + """Wrap the given call with SSL error-trapping. + + is_reader: if False EOF errors will be raised. If True, EOF errors + will return "" (to emulate normal sockets). + """ + start = time.time() + while True: + try: + return call(*args, **kwargs) + except SSL.WantReadError: + # Sleep and try again. This is dangerous, because it means + # the rest of the stack has no way of differentiating + # between a "new handshake" error and "client dropped". + # Note this isn't an endless loop: there's a timeout below. + time.sleep(self.ssl_retry) + except SSL.WantWriteError: + time.sleep(self.ssl_retry) + except SSL.SysCallError, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + errnum = e.args[0] + if is_reader and errnum in wsgiserver.socket_errors_to_ignore: + return "" + raise socket.error(errnum) + except SSL.Error, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + thirdarg = None + try: + thirdarg = e.args[0][0][2] + except IndexError: + pass + + if thirdarg == 'http request': + # The client is talking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError() + + raise wsgiserver.FatalSSLAlert(*e.args) + except: + raise + + if time.time() - start > self.ssl_timeout: + raise socket.timeout("timed out") + + def recv(self, *args, **kwargs): + buf = [] + r = super(SSL_fileobject, self).recv + while True: + data = self._safe_call(True, r, *args, **kwargs) + buf.append(data) + p = self._sock.pending() + if not p: + return "".join(buf) + + def sendall(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).sendall, + *args, **kwargs) + + def send(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).send, + *args, **kwargs) + + +class SSLConnection: + """A thread-safe wrapper for an SSL.Connection. + + ``*args``: the arguments to create the wrapped ``SSL.Connection(*args)``. + """ + + def __init__(self, *args): + self._ssl_conn = SSL.Connection(*args) + self._lock = threading.RLock() + + for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', + 'renegotiate', 'bind', 'listen', 'connect', 'accept', + 'setblocking', 'fileno', 'close', 'get_cipher_list', + 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', + 'makefile', 'get_app_data', 'set_app_data', 'state_string', + 'sock_shutdown', 'get_peer_certificate', 'want_read', + 'want_write', 'set_connect_state', 'set_accept_state', + 'connect_ex', 'sendall', 'settimeout', 'gettimeout'): + exec("""def %s(self, *args): + self._lock.acquire() + try: + return self._ssl_conn.%s(*args) + finally: + self._lock.release() +""" % (f, f)) + + def shutdown(self, *args): + self._lock.acquire() + try: + # pyOpenSSL.socket.shutdown takes no args + return self._ssl_conn.shutdown() + finally: + self._lock.release() + + +class pyOpenSSLAdapter(wsgiserver.SSLAdapter): + """A wrapper for integrating pyOpenSSL with CherryPy.""" + + context = None + """An instance of SSL.Context.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + certificate_chain = None + """Optional. The filename of CA's intermediate certificate bundle. + + This is needed for cheaper "chained root" SSL certificates, and should be + left as None if not required.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if SSL is None: + raise ImportError("You must install pyOpenSSL to use HTTPS.") + + self.context = None + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + self._environ = None + + def bind(self, sock): + """Wrap and return the given socket.""" + if self.context is None: + self.context = self.get_context() + conn = SSLConnection(self.context, sock) + self._environ = self.get_environ() + return conn + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + return sock, self._environ.copy() + + def get_context(self): + """Return an SSL.Context from self attributes.""" + # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 + c = SSL.Context(SSL.SSLv23_METHOD) + c.use_privatekey_file(self.private_key) + if self.certificate_chain: + c.load_verify_locations(self.certificate_chain) + c.use_certificate_file(self.certificate) + return c + + def get_environ(self): + """Return WSGI environ entries to be merged into each request.""" + ssl_environ = { + "HTTPS": "on", + # pyOpenSSL doesn't provide access to any of these AFAICT +## 'SSL_PROTOCOL': 'SSLv2', +## SSL_CIPHER string The cipher specification name +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + + if self.certificate: + # Server certificate attributes + cert = open(self.certificate, 'rb').read() + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) + ssl_environ.update({ + 'SSL_SERVER_M_VERSION': cert.get_version(), + 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), +## 'SSL_SERVER_V_START': Validity of server's certificate (start time), +## 'SSL_SERVER_V_END': Validity of server's certificate (end time), + }) + + for prefix, dn in [("I", cert.get_issuer()), + ("S", cert.get_subject())]: + # X509Name objects don't seem to have a way to get the + # complete DN string. Use str() and slice it instead, + # because str(dn) == "" + dnstr = str(dn)[18:-2] + + wsgikey = 'SSL_SERVER_%s_DN' % prefix + ssl_environ[wsgikey] = dnstr + + # The DN should be of the form: /k1=v1/k2=v2, but we must allow + # for any value to contain slashes itself (in a URL). + while dnstr: + pos = dnstr.rfind("=") + dnstr, value = dnstr[:pos], dnstr[pos + 1:] + pos = dnstr.rfind("/") + dnstr, key = dnstr[:pos], dnstr[pos + 1:] + if key and value: + wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) + ssl_environ[wsgikey] = value + + return ssl_environ + + def makefile(self, sock, mode='r', bufsize=-1): + if SSL and isinstance(sock, SSL.ConnectionType): + timeout = sock.gettimeout() + f = SSL_fileobject(sock, mode, bufsize) + f.ssl_timeout = timeout + return f + else: + return wsgiserver.CP_fileobject(sock, mode, bufsize) + diff --git a/src/cherrypy/wsgiserver/wsgiserver2.py b/src/cherrypy/wsgiserver/wsgiserver2.py new file mode 100644 index 0000000000..b6bd499718 --- /dev/null +++ b/src/cherrypy/wsgiserver/wsgiserver2.py @@ -0,0 +1,2322 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_fileobject', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import rfc822 +import socket +import sys +if 'win' in sys.platform and not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 +try: + import cStringIO as StringIO +except ImportError: + import StringIO +DEFAULT_BUFFER_SIZE = -1 + +_fileobject_uses_str_type = isinstance(socket._fileobject(None)._rbuf, basestring) + +import threading +import time +import traceback +def format_exc(limit=None): + """Like print_exc() but return a string. Backport for Python 2.3.""" + try: + etype, value, tb = sys.exc_info() + return ''.join(traceback.format_exception(etype, value, tb, limit)) + finally: + etype = value = tb = None + + +from urllib import unquote +from urlparse import urlparse +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate']] + + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = ", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server= server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response("414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response("400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + rp = int(req_protocol[5]), int(req_protocol[7]) + except (ValueError, IndexError): + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [unquote(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = "%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + sp = int(self.server.protocol[5]), int(self.server.protocol[7]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get("Content-Length", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get("Connection", "") == "close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get("Connection", "") != "Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get("Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == "chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get("Expect", "") == "100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See http://www.cherrypy.org/ticket/951 + msg = self.server.protocol + " 100 Continue\r\n\r\n" + try: + self.conn.wfile.sendall(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + i = uri.find('://') + if i > 0 and QUESTION_MARK not in uri[:i]: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + scheme, remainder = uri[:i].lower(), uri[i + 3:] + authority, path = remainder.split(FORWARD_SLASH, 1) + path = FORWARD_SLASH + path + return scheme, authority, path + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get("Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.sendall("0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [self.server.protocol + SPACE + + status + CRLF, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append("Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [hex(len(chunk))[2:], CRLF, chunk, CRLF] + self.conn.wfile.sendall(EMPTY.join(buf)) + else: + self.conn.wfile.sendall(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif "content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != 'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append(("Transfer-Encoding", "chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if "connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append(("Connection", "close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append(("Connection", "Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if "date" not in hkeys: + self.outheaders.append(("Date", rfc822.formatdate())) + + if "server" not in hkeys: + self.outheaders.append(("Server", self.server.server_name)) + + buf = [self.server.protocol + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.sendall(EMPTY.join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_fileobject(socket._fileobject): + """Faux file object attached to a socket object.""" + + def __init__(self, *args, **kwargs): + self.bytes_read = 0 + self.bytes_written = 0 + socket._fileobject.__init__(self, *args, **kwargs) + + def sendall(self, data): + """Sendall for non-blocking sockets.""" + while data: + try: + bytes_sent = self.send(data) + data = data[bytes_sent:] + except socket.error, e: + if e.args[0] not in socket_errors_nonblocking: + raise + + def send(self, data): + bytes_sent = self._sock.send(data) + self.bytes_written += bytes_sent + return bytes_sent + + def flush(self): + if self._wbuf: + buffer = "".join(self._wbuf) + self._wbuf = [] + self.sendall(buffer) + + def recv(self, size): + while True: + try: + data = self._sock.recv(size) + self.bytes_read += len(data) + return data + except socket.error, e: + if (e.args[0] not in socket_errors_nonblocking + and e.args[0] not in socket_error_eintr): + raise + + if not _fileobject_uses_str_type: + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(rbufsize) + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + data = self.recv(left) + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self.recv + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(self._rbufsize) + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(self._rbufsize) + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + else: + def read(self, size=-1): + if size < 0: + # Read until EOF + buffers = [self._rbuf] + self._rbuf = "" + if self._rbufsize <= 1: + recv_size = self.default_bufsize + else: + recv_size = self._rbufsize + + while True: + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + return "".join(buffers) + else: + # Read until size bytes or EOF seen, whichever comes first + data = self._rbuf + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + left = size - buf_len + recv_size = max(self._rbufsize, left) + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + def readline(self, size=-1): + data = self._rbuf + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + assert data == "" + buffers = [] + while data != "\n": + data = self.recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + return "".join(buffers) + else: + # Read until size bytes or \n or EOF seen, whichever comes first + nl = data.find('\n', 0, size) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + left = size - buf_len + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + +class HTTPConnection(object): + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_fileobject): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if errnum == 'timed out' or errnum == 'The read operation timed out': + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See http://www.cherrypy.org/ticket/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_fileobject(self.socket._sock, "wb", self.wbufsize) + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + if hasattr(self.socket, '_sock'): + self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + """An object which equals and does math like the integer '0' but evals True.""" + def __add__(self, other): + return other + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ((self.start_time is None) and trueyzero or self.conn.requests_seen), + 'Bytes Read': lambda s: self.bytes_read + ((self.start_time is None) and trueyzero or self.conn.rfile.bytes_read), + 'Bytes Written': lambda s: self.bytes_written + ((self.start_time is None) and trueyzero or self.conn.wfile.bytes_written), + 'Work Time': lambda s: self.work_time + ((self.start_time is None) and trueyzero or time.time() - self.start_time), + 'Read Throughput': lambda s: s['Bytes Read'](s) / (s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / (s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in range(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in range(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit).""" + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections (default 5).""" + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit.""" + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.2.2" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections.""" + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum([w['Requests'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Read'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Written'](w) for w + in s['Worker Threads'].values()], 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum([w['Work Time'](w) for w + in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # SSL backward compatibility + if (self.ssl_adapter is None and + getattr(self, 'ssl_certificate', None) and + getattr(self, 'ssl_private_key', None)): + warnings.warn( + "SSL attributes are deprecated in CherryPy 3.2, and will " + "be removed in CherryPy 3.3. Use an ssl_adapter attribute " + "instead.", + DeprecationWarning + ) + try: + from cherrypy.wsgiserver.ssl_pyopenssl import pyOpenSSLAdapter + except ImportError: + pass + else: + self.ssl_adapter = pyOpenSSLAdapter( + self.ssl_certificate, self.ssl_private_key, + getattr(self, 'ssl_certificate_chain', None)) + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 511) # 0777 + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See http://www.cherrypy.org/ticket/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_fileobject + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See http://www.cherrypy.org/ticket/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + """A base class to interface HTTPServer with other systems, such as WSGI.""" + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + 'pyopenssl': 'cherrypy.wsgiserver.ssl_pyopenssl.pyOpenSSLAdapter', + } + +def get_ssl_adapter_class(name='pyopenssl'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# -------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0], exc_info[1], exc_info[2] + finally: + exc_info = None + + self.req.status = status + for k, v in headers: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.extend(headers) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path, + 'QUERY_STRING': req.qs, + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method, + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol, + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme, + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.iteritems(): + env["HTTP_" + k.upper().replace("-", "_")] = v + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and values + in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = dict([(k.decode('ISO-8859-1'), v) for k, v in env_10.iteritems()]) + env[u'wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault(u'wsgi.url_encoding', u'utf-8') + try: + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env[u'wsgi.url_encoding'] = u'ISO-8859-1' + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + + for k, v in sorted(env.items()): + if isinstance(v, str) and k not in ('REQUEST_URI', 'wsgi.input'): + env[k] = v.decode('ISO-8859-1') + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort(cmp=lambda x,y: cmp(len(x[0]), len(y[0]))) + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] + diff --git a/src/cherrypy/wsgiserver/wsgiserver3.py b/src/cherrypy/wsgiserver/wsgiserver3.py new file mode 100644 index 0000000000..62db5ffd3b --- /dev/null +++ b/src/cherrypy/wsgiserver/wsgiserver3.py @@ -0,0 +1,2040 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_makefile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import email.utils +import socket +import sys +if 'win' in sys.platform and not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 +if sys.version_info < (3,1): + import io +else: + import _pyio as io +DEFAULT_BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE + +import threading +import time +from traceback import format_exc +from urllib.parse import unquote +from urllib.parse import urlparse +from urllib.parse import scheme_chars +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate']] + + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = b", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server= server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response("414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response("400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + # The [x:y] slicing is necessary for byte strings to avoid getting ord's + rp = int(req_protocol[5:6]), int(req_protocol[7:8]) + except ValueError: + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [self.unquote_bytes(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = b"%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + # The [x:y] slicing is necessary for byte strings to avoid getting ord's + sp = int(self.server.protocol[5:6]), int(self.server.protocol[7:8]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get(b"Content-Length", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get(b"Connection", b"") == b"close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get(b"Connection", b"") != b"Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get(b"Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(b",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == b"chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get(b"Expect", b"") == b"100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See http://www.cherrypy.org/ticket/951 + msg = self.server.protocol.encode('ascii') + b" 100 Continue\r\n\r\n" + try: + self.conn.wfile.write(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + scheme, sep, remainder = uri.partition(b'://') + if sep and QUESTION_MARK not in scheme: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + authority, path_a, path_b = remainder.partition(FORWARD_SLASH) + return scheme.lower(), authority, path_a+path_b + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def unquote_bytes(self, path): + """takes quoted string and unquotes % encoded values""" + res = path.split(b'%') + + for i in range(1, len(res)): + item = res[i] + try: + res[i] = bytes([int(item[:2], 16)]) + item[2:] + except ValueError: + raise + return b''.join(res) + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get(b"Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.write(b"0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [bytes(self.server.protocol, "ascii") + SPACE + + bytes(status, "ISO-8859-1") + CRLF, + bytes("Content-Length: %s\r\n" % len(msg), "ISO-8859-1"), + b"Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append(b"Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.write(b"".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [bytes(hex(len(chunk)), 'ASCII')[2:], CRLF, chunk, CRLF] + self.conn.wfile.write(EMPTY.join(buf)) + else: + self.conn.wfile.write(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif b"content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != b'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append((b"Transfer-Encoding", b"chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if b"connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append((b"Connection", b"close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append((b"Connection", b"Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if b"date" not in hkeys: + self.outheaders.append( + (b"Date", email.utils.formatdate(usegmt=True).encode('ISO-8859-1'))) + + if b"server" not in hkeys: + self.outheaders.append( + (b"Server", self.server.server_name.encode('ISO-8859-1'))) + + buf = [self.server.protocol.encode('ascii') + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.write(EMPTY.join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_BufferedWriter(io.BufferedWriter): + """Faux file object attached to a socket object.""" + + def write(self, b): + self._checkClosed() + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + + with self._write_lock: + self._write_buf.extend(b) + self._flush_unlocked() + return len(b) + + def _flush_unlocked(self): + self._checkClosed("flush of closed file") + while self._write_buf: + try: + # ssl sockets only except 'bytes', not bytearrays + # so perhaps we should conditionally wrap this for perf? + n = self.raw.write(bytes(self._write_buf)) + except io.BlockingIOError as e: + n = e.characters_written + del self._write_buf[:n] + + +def CP_makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + if 'r' in mode: + return io.BufferedReader(socket.SocketIO(sock, mode), bufsize) + else: + return CP_BufferedWriter(socket.SocketIO(sock, mode), bufsize) + +class HTTPConnection(object): + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_makefile): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if errnum == 'timed out' or errnum == 'The read operation timed out': + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See http://www.cherrypy.org/ticket/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_makefile(self.socket._sock, "wb", self.wbufsize) + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + # Python 3 *probably* fixed this with socket._real_close; hard to tell. +## self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + """An object which equals and does math like the integer '0' but evals True.""" + def __add__(self, other): + return other + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ((self.start_time is None) and trueyzero or self.conn.requests_seen), + 'Bytes Read': lambda s: self.bytes_read + ((self.start_time is None) and trueyzero or self.conn.rfile.bytes_read), + 'Bytes Written': lambda s: self.bytes_written + ((self.start_time is None) and trueyzero or self.conn.wfile.bytes_written), + 'Work Time': lambda s: self.work_time + ((self.start_time is None) and trueyzero or time.time() - self.start_time), + 'Read Throughput': lambda s: s['Bytes Read'](s) / (s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / (s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in range(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in range(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit).""" + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections (default 5).""" + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit.""" + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.2.2" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections.""" + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum([w['Requests'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Read'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Written'](w) for w + in s['Worker Threads'].values()], 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum([w['Work Time'](w) for w + in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 511) # 0777 + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See http://www.cherrypy.org/ticket/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_makefile + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.write("".join(buf).encode('ISO-8859-1')) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See http://www.cherrypy.org/ticket/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + """A base class to interface HTTPServer with other systems, such as WSGI.""" + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + } + +def get_ssl_adapter_class(name='builtin'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# -------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) + finally: + exc_info = None + + # According to PEP 3333, when using Python 3, the response status + # and headers must be bytes masquerading as unicode; that is, they + # must be of type "str" but are restricted to code points in the + # "latin-1" set. + if not isinstance(status, str): + raise TypeError("WSGI response status is not of type str.") + self.req.status = status.encode('ISO-8859-1') + + for k, v in headers: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.append((k.encode('ISO-8859-1'), v.encode('ISO-8859-1'))) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path.decode('ISO-8859-1'), + 'QUERY_STRING': req.qs.decode('ISO-8859-1'), + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method.decode('ISO-8859-1'), + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol.decode('ISO-8859-1'), + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme.decode('ISO-8859-1'), + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.items(): + k = k.decode('ISO-8859-1').upper().replace("-", "_") + env["HTTP_" + k] = v.decode('ISO-8859-1') + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and values + in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = env_10.copy() + env['wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault('wsgi.url_encoding', 'utf-8') + try: + # SCRIPT_NAME is the empty string, who cares what encoding it is? + env["PATH_INFO"] = req.path.decode(env['wsgi.url_encoding']) + env["QUERY_STRING"] = req.qs.decode(env['wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env['wsgi.url_encoding'] = 'ISO-8859-1' + env["PATH_INFO"] = env_10["PATH_INFO"] + env["QUERY_STRING"] = env_10["QUERY_STRING"] + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort() + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] +