diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index 5ebd50d6e0..dd7f3f7275 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -353,16 +353,20 @@ class BooksModel(QAbstractTableModel): return self.db.title(row_number) def cover(self, row_number): - id = self.db.id(row_number) data = None - if self.cover_cache: - img = self.cover_cache.cover(id) - if img: - if img.isNull(): - img = self.default_image - return img - if not data: - data = self.db.cover(row_number) + try: + id = self.db.id(row_number) + if self.cover_cache: + img = self.cover_cache.cover(id) + if img: + if img.isNull(): + img = self.default_image + return img + if not data: + data = self.db.cover(row_number) + except IndexError: # Happens if database has not yet been refreshed + pass + if not data: return self.default_image img = QImage() diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index 649a950ea7..37b799832b 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -7,34 +7,105 @@ __docformat__ = 'restructuredtext en' HTTP server for remote access to the calibre database. ''' -import sys, logging +import sys, logging, re, SocketServer, gzip, cStringIO from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from calibre.constants import __version__ from calibre.utils.config import StringConfig, Config +from calibre import ColoredFormatter -class Server(HTTPServer): - pass +class Server(SocketServer.ThreadingMixIn, HTTPServer): + def __init__(self, opts, db): + self.db = db + HTTPServer.__init__(self, ('', opts.port), DBHandler) + + def serve_forever(self): + logging.getLogger('calibre.server').info('calibre-server starting...') + HTTPServer.serve_forever(self) + class DBHandler(BaseHTTPRequestHandler): server_version = 'calibre/'+__version__ + protocol_version = 'HTTP/1.0' + cover_request = re.compile(r'/(\d+)/cover', re.IGNORECASE) + thumbnail_request = re.compile(r'/(\d+)/thumb', re.IGNORECASE) + fmt_request = re.compile(r'/(\d+)/([a-z0-9]+)', re.IGNORECASE) + - def set_db(self, db): - self.db = db - self.l = logging.getLogger('calibre.server') - self.l.info('calibre-server starting...') + def __init__(self, request, client_address, server, *args, **kwargs): + self.l = logging.getLogger('calibre.server') + self.db = server.db + BaseHTTPRequestHandler.__init__(self, request, client_address, server, *args, **kwargs) -def server(db, opts): - return Server(('', opts.port), DBHandler) + def log_message(self, fmt, *args): + self.l.info("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + fmt%args)) + + def log_error(self, fmt, *args): + self.l.error("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + fmt%args)) + + def do_GET(self): + cover = self.cover_request.match(self.path) + thumb = self.thumbnail_request.match(self.path) + fmt = self.fmt_request.match(self.path) + if self.path == '/': + self.send_index() + elif self.path == '/stanza.atom': + self.send_stanza_index() + elif self.path == '/library': + self.send_library() + elif thumb: + self.send_cover(int(thumb.group(1)), thumbnail=True) + elif cover: + self.send_cover(int(cover.group(1))) + elif fmt: + self.send_format(int(fmt.group(1)), fmt.group(2).upper()) + elif self.path == '/help': + self.send_help() + + self.send_error(400, 'Bad request. Try /help for usage.') + + def compress(buf): + zbuf = cStringIO.StringIO() + zfile = gzip.GzipFile(mode = 'wb', fileobj = zbuf, compresslevel = 9) + zfile.write(buf) + zfile.close() + return zbuf.getvalue() + + def send_help(self): + self.send_error(501, 'Not Implemented') + + def send_index(self): + self.send_error(501, 'Not Implemented') + + def send_stanza_index(self): + self.send_error(501, 'Not Implemented') + + def send_library(self): + self.send_error(501, 'Not Implemented') + + def send_cover(self, id, thumbnail=False): + self.send_error(501, 'Not Implemented') + + def send_format(self, id, fmt): + self.send_error(501, 'Not Implemented') + def config(defaults=None): desc=_('Settings to control the calibre content server') c = Config('server', desc) if defaults is None else StringConfig(defaults, desc) c.add_opt('port', ['-p', '--port'], default=8080, help=_('The port on which to listen. Default is %default')) + c.add_opt('debug', ['--debug'], default=False, + help=_('Detailed logging')) return c def option_parser(): @@ -43,13 +114,19 @@ def option_parser(): def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) + l = logging.getLogger('calibre.server') + l.setLevel(logging.DEBUG if opts.debug else logging.INFO) + l.addHandler(logging.StreamHandler(sys.stdout)) + l.handlers[-1].setLevel(logging.DEBUG if opts.debug else logging.INFO) + formatter = ColoredFormatter('%(levelname)s: %(message)s') + l.handlers[-1].setFormatter(formatter) from calibre.utils.config import prefs from calibre.library.database2 import LibraryDatabase2 db = LibraryDatabase2(prefs['library_path']) try: print 'Starting server...' - s = server(db, opts) + s = Server(opts, db) s.serve_forever() except KeyboardInterrupt: print 'Server interrupted' diff --git a/src/calibre/translations/bg.po b/src/calibre/translations/bg.po index 71b70de16a..4cb947387f 100644 --- a/src/calibre/translations/bg.po +++ b/src/calibre/translations/bg.po @@ -13,7 +13,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/ca.po b/src/calibre/translations/ca.po index 9da0d519a1..17657528cf 100644 --- a/src/calibre/translations/ca.po +++ b/src/calibre/translations/ca.po @@ -17,7 +17,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/cs.po b/src/calibre/translations/cs.po index 1a1ba5168f..673585c33d 100644 --- a/src/calibre/translations/cs.po +++ b/src/calibre/translations/cs.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/de.po b/src/calibre/translations/de.po index 28042b015d..979ce1e877 100644 --- a/src/calibre/translations/de.po +++ b/src/calibre/translations/de.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/el.po b/src/calibre/translations/el.po index b937b1acf4..078de1bc48 100644 --- a/src/calibre/translations/el.po +++ b/src/calibre/translations/el.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/es.po b/src/calibre/translations/es.po index d2c61df82e..238c9236cd 100644 --- a/src/calibre/translations/es.po +++ b/src/calibre/translations/es.po @@ -17,7 +17,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/fr.po b/src/calibre/translations/fr.po index 1c4d12cca2..e735a1a6c5 100644 --- a/src/calibre/translations/fr.po +++ b/src/calibre/translations/fr.po @@ -13,7 +13,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/gl.po b/src/calibre/translations/gl.po index 200946b342..69dadee42b 100644 --- a/src/calibre/translations/gl.po +++ b/src/calibre/translations/gl.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/it.po b/src/calibre/translations/it.po index fdbf782cf4..0ecaa27df5 100644 --- a/src/calibre/translations/it.po +++ b/src/calibre/translations/it.po @@ -15,7 +15,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/nb.po b/src/calibre/translations/nb.po index 2c4104c5aa..a05ef88c04 100644 --- a/src/calibre/translations/nb.po +++ b/src/calibre/translations/nb.po @@ -8,13 +8,13 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME \n" "POT-Creation-Date: 2008-10-22 03:42+0000\n" -"PO-Revision-Date: 2008-09-19 20:41+0000\n" -"Last-Translator: Kovid Goyal \n" +"PO-Revision-Date: 2008-10-27 06:38+0000\n" +"Last-Translator: Helene Klungvik \n" "Language-Team: Norwegian Bokmal \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 @@ -30,13 +30,15 @@ msgstr "Lesebrettet har ikke et lagringskort tilknyttet seg." #: /home/kovid/work/calibre/src/calibre/ebooks/epub/__init__.py:58 msgid "Options to control the conversion to EPUB" -msgstr "" +msgstr "Opsjoner for å kontrollere konverteringen til EPUB" #: /home/kovid/work/calibre/src/calibre/ebooks/epub/__init__.py:69 msgid "" "The output EPUB file. If not specified, it is derived from the input file " "name." msgstr "" +"Utgående EPUB fil. Hvis dette ikke er spesifisert, så blir navnet avledet " +"fra inngående filnavn." #: /home/kovid/work/calibre/src/calibre/ebooks/epub/__init__.py:71 msgid "" diff --git a/src/calibre/translations/nds.po b/src/calibre/translations/nds.po index 14db91384e..0ca6534f6b 100644 --- a/src/calibre/translations/nds.po +++ b/src/calibre/translations/nds.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/nl.po b/src/calibre/translations/nl.po index 89f25c75b4..38b3c6c034 100644 --- a/src/calibre/translations/nl.po +++ b/src/calibre/translations/nl.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/pt.po b/src/calibre/translations/pt.po index 2a9a344927..6f099b5f42 100644 --- a/src/calibre/translations/pt.po +++ b/src/calibre/translations/pt.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/ru.po b/src/calibre/translations/ru.po index e378a25cc3..5baeb85d00 100644 --- a/src/calibre/translations/ru.po +++ b/src/calibre/translations/ru.po @@ -13,7 +13,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/sl.po b/src/calibre/translations/sl.po index 6a3d0c6c8c..81263ed7ed 100644 --- a/src/calibre/translations/sl.po +++ b/src/calibre/translations/sl.po @@ -13,7 +13,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/sv.po b/src/calibre/translations/sv.po index d4236169bd..cc6b612d94 100644 --- a/src/calibre/translations/sv.po +++ b/src/calibre/translations/sv.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/calibre/translations/te.po b/src/calibre/translations/te.po index 9cd2fb7aec..af0fd1084d 100644 --- a/src/calibre/translations/te.po +++ b/src/calibre/translations/te.po @@ -14,7 +14,7 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2008-10-24 22:52+0000\n" +"X-Launchpad-Export-Date: 2008-10-31 06:01+0000\n" "X-Generator: Launchpad (build Unknown)\n" #: /home/kovid/work/calibre/src/calibre/devices/prs505/driver.py:131 diff --git a/src/cherrypy/LICENSE.txt b/src/cherrypy/LICENSE.txt new file mode 100644 index 0000000000..2504532a18 --- /dev/null +++ b/src/cherrypy/LICENSE.txt @@ -0,0 +1,25 @@ +Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the CherryPy Team nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/cherrypy/__init__.py b/src/cherrypy/__init__.py new file mode 100644 index 0000000000..317e465062 --- /dev/null +++ b/src/cherrypy/__init__.py @@ -0,0 +1,557 @@ +"""CherryPy is a pythonic, object-oriented HTTP framework. + + +CherryPy consists of not one, but four separate API layers. + +The APPLICATION LAYER is the simplest. CherryPy applications are written as +a tree of classes and methods, where each branch in the tree corresponds to +a branch in the URL path. Each method is a 'page handler', which receives +GET and POST params as keyword arguments, and returns or yields the (HTML) +body of the response. The special method name 'index' is used for paths +that end in a slash, and the special method name 'default' is used to +handle multiple paths via a single handler. This layer also includes: + + * the 'exposed' attribute (and cherrypy.expose) + * cherrypy.quickstart() + * _cp_config attributes + * cherrypy.tools (including cherrypy.session) + * cherrypy.url() + +The ENVIRONMENT LAYER is used by developers at all levels. It provides +information about the current request and response, plus the application +and server environment, via a (default) set of top-level objects: + + * cherrypy.request + * cherrypy.response + * cherrypy.engine + * cherrypy.server + * cherrypy.tree + * cherrypy.config + * cherrypy.thread_data + * cherrypy.log + * cherrypy.HTTPError, NotFound, and HTTPRedirect + * cherrypy.lib + +The EXTENSION LAYER allows advanced users to construct and share their own +plugins. It consists of: + + * Hook API + * Tool API + * Toolbox API + * Dispatch API + * Config Namespace API + +Finally, there is the CORE LAYER, which uses the core API's to construct +the default components which are available at higher layers. You can think +of the default components as the 'reference implementation' for CherryPy. +Megaframeworks (and advanced users) may replace the default components +with customized or extended components. The core API's are: + + * Application API + * Engine API + * Request API + * Server API + * WSGI API + +These API's are described in the CherryPy specification: +http://www.cherrypy.org/wiki/CherryPySpec +""" + +__version__ = "3.1.0" + +from urlparse import urljoin as _urljoin + + +class _AttributeDocstrings(type): + """Metaclass for declaring docstrings for class attributes.""" + # The full docstring for this type is down in the __init__ method so + # that it doesn't show up in help() for every consumer class. + + def __init__(cls, name, bases, dct): + '''Metaclass for declaring docstrings for class attributes. + + Base Python doesn't provide any syntax for setting docstrings on + 'data attributes' (non-callables). This metaclass allows class + definitions to follow the declaration of a data attribute with + a docstring for that attribute; the attribute docstring will be + popped from the class dict and folded into the class docstring. + + The naming convention for attribute docstrings is: + + "__doc". + For example: + + class Thing(object): + """A thing and its properties.""" + + __metaclass__ = cherrypy._AttributeDocstrings + + height = 50 + height__doc = """The height of the Thing in inches.""" + + In which case, help(Thing) starts like this: + + >>> help(mod.Thing) + Help on class Thing in module pkg.mod: + + class Thing(__builtin__.object) + | A thing and its properties. + | + | height [= 50]: + | The height of the Thing in inches. + | + + The benefits of this approach over hand-edited class docstrings: + 1. Places the docstring nearer to the attribute declaration. + 2. Makes attribute docs more uniform ("name (default): doc"). + 3. Reduces mismatches of attribute _names_ between + the declaration and the documentation. + 4. Reduces mismatches of attribute default _values_ between + the declaration and the documentation. + + The benefits of a metaclass approach over other approaches: + 1. Simpler ("less magic") than interface-based solutions. + 2. __metaclass__ can be specified at the module global level + for classic classes. + + For various formatting reasons, you should write multiline docs + with a leading newline and not a trailing one: + + response__doc = """ + The response object for the current thread. In the main thread, + and any threads which are not HTTP requests, this is None.""" + + The type of the attribute is intentionally not included, because + that's not How Python Works. Quack. + ''' + + newdoc = [cls.__doc__ or ""] + + dctnames = dct.keys() + dctnames.sort() + + for name in dctnames: + if name.endswith("__doc"): + # Remove the magic doc attribute. + if hasattr(cls, name): + delattr(cls, name) + + # Make a uniformly-indented docstring from it. + val = '\n'.join([' ' + line.strip() + for line in dct[name].split('\n')]) + + # Get the default value. + attrname = name[:-5] + try: + attrval = getattr(cls, attrname) + except AttributeError: + attrval = "missing" + + # Add the complete attribute docstring to our list. + newdoc.append("%s [= %r]:\n%s" % (attrname, attrval, val)) + + # Add our list of new docstrings to the class docstring. + cls.__doc__ = "\n\n".join(newdoc) + + +from cherrypy._cperror import HTTPError, HTTPRedirect, InternalRedirect +from cherrypy._cperror import NotFound, CherryPyException, TimeoutError + +from cherrypy import _cpdispatch as dispatch + +from cherrypy import _cptools +tools = _cptools.default_toolbox +Tool = _cptools.Tool + +from cherrypy import _cprequest +from cherrypy.lib import http as _http + +from cherrypy import _cptree +tree = _cptree.Tree() +from cherrypy._cptree import Application +from cherrypy import _cpwsgi as wsgi + +from cherrypy import process +try: + from cherrypy.process import win32 + engine = win32.Win32Bus() + engine.console_control_handler = win32.ConsoleCtrlHandler(engine) + del win32 +except ImportError: + engine = process.bus + + +# Timeout monitor +class _TimeoutMonitor(process.plugins.Monitor): + + def __init__(self, bus): + self.servings = [] + process.plugins.Monitor.__init__(self, bus, self.run) + + def acquire(self): + self.servings.append((serving.request, serving.response)) + + def release(self): + try: + self.servings.remove((serving.request, serving.response)) + except ValueError: + pass + + def run(self): + """Check timeout on all responses. (Internal)""" + for req, resp in self.servings: + resp.check_timeout() +engine.timeout_monitor = _TimeoutMonitor(engine) +engine.timeout_monitor.subscribe() + +engine.autoreload = process.plugins.Autoreloader(engine) +engine.autoreload.subscribe() + +engine.thread_manager = process.plugins.ThreadManager(engine) +engine.thread_manager.subscribe() + +engine.signal_handler = process.plugins.SignalHandler(engine) + + +from cherrypy import _cpserver +server = _cpserver.Server() +server.subscribe() + + +def quickstart(root=None, script_name="", config=None): + """Mount the given root, start the builtin server (and engine), then block. + + root: an instance of a "controller class" (a collection of page handler + methods) which represents the root of the application. + script_name: a string containing the "mount point" of the application. + This should start with a slash, and be the path portion of the URL + at which to mount the given root. For example, if root.index() will + handle requests to "http://www.example.com:8080/dept/app1/", then + the script_name argument would be "/dept/app1". + + It MUST NOT end in a slash. If the script_name refers to the root + of the URI, it MUST be an empty string (not "/"). + config: a file or dict containing application config. If this contains + a [global] section, those entries will be used in the global + (site-wide) config. + """ + if config: + _global_conf_alias.update(config) + + if root is not None: + tree.mount(root, script_name, config) + + if hasattr(engine, "signal_handler"): + engine.signal_handler.subscribe() + if hasattr(engine, "console_control_handler"): + engine.console_control_handler.subscribe() + + engine.start() + engine.block() + + +try: + from threading import local as _local +except ImportError: + from cherrypy._cpthreadinglocal import local as _local + +class _Serving(_local): + """An interface for registering request and response objects. + + Rather than have a separate "thread local" object for the request and + the response, this class works as a single threadlocal container for + both objects (and any others which developers wish to define). In this + way, we can easily dump those objects when we stop/start a new HTTP + conversation, yet still refer to them as module-level globals in a + thread-safe way. + """ + + __metaclass__ = _AttributeDocstrings + + request = _cprequest.Request(_http.Host("127.0.0.1", 80), + _http.Host("127.0.0.1", 1111)) + request__doc = """ + The request object for the current thread. In the main thread, + and any threads which are not receiving HTTP requests, this is None.""" + + response = _cprequest.Response() + response__doc = """ + The response object for the current thread. In the main thread, + and any threads which are not receiving HTTP requests, this is None.""" + + def load(self, request, response): + self.request = request + self.response = response + + def clear(self): + """Remove all attributes of self.""" + self.__dict__.clear() + +serving = _Serving() + + +class _ThreadLocalProxy(object): + + __slots__ = ['__attrname__', '__dict__'] + + def __init__(self, attrname): + self.__attrname__ = attrname + + def __getattr__(self, name): + child = getattr(serving, self.__attrname__) + return getattr(child, name) + + def __setattr__(self, name, value): + if name in ("__attrname__", ): + object.__setattr__(self, name, value) + else: + child = getattr(serving, self.__attrname__) + setattr(child, name, value) + + def __delattr__(self, name): + child = getattr(serving, self.__attrname__) + delattr(child, name) + + def _get_dict(self): + child = getattr(serving, self.__attrname__) + d = child.__class__.__dict__.copy() + d.update(child.__dict__) + return d + __dict__ = property(_get_dict) + + def __getitem__(self, key): + child = getattr(serving, self.__attrname__) + return child[key] + + def __setitem__(self, key, value): + child = getattr(serving, self.__attrname__) + child[key] = value + + def __delitem__(self, key): + child = getattr(serving, self.__attrname__) + del child[key] + + def __contains__(self, key): + child = getattr(serving, self.__attrname__) + return key in child + + def __len__(self): + child = getattr(serving, self.__attrname__) + return len(child) + + +# Create request and response object (the same objects will be used +# throughout the entire life of the webserver, but will redirect +# to the "serving" object) +request = _ThreadLocalProxy('request') +response = _ThreadLocalProxy('response') + +# Create thread_data object as a thread-specific all-purpose storage +class _ThreadData(_local): + """A container for thread-specific data.""" +thread_data = _ThreadData() + + +# Monkeypatch pydoc to allow help() to go through the threadlocal proxy. +# Jan 2007: no Googleable examples of anyone else replacing pydoc.resolve. +# The only other way would be to change what is returned from type(request) +# and that's not possible in pure Python (you'd have to fake ob_type). +def _cherrypy_pydoc_resolve(thing, forceload=0): + """Given an object or a path to an object, get the object and its name.""" + if isinstance(thing, _ThreadLocalProxy): + thing = getattr(serving, thing.__attrname__) + return _pydoc._builtin_resolve(thing, forceload) + +try: + import pydoc as _pydoc + _pydoc._builtin_resolve = _pydoc.resolve + _pydoc.resolve = _cherrypy_pydoc_resolve +except ImportError: + pass + + +from cherrypy import _cplogging + +class _GlobalLogManager(_cplogging.LogManager): + + def __call__(self, *args, **kwargs): + try: + log = request.app.log + except AttributeError: + log = self + return log.error(*args, **kwargs) + + def access(self): + try: + return request.app.log.access() + except AttributeError: + return _cplogging.LogManager.access(self) + + +log = _GlobalLogManager() +# Set a default screen handler on the global log. +log.screen = True +log.error_file = '' +# Using an access file makes CP about 10% slower. Leave off by default. +log.access_file = '' + +def _buslog(msg, level): + log.error(msg, 'ENGINE', severity=level) +engine.subscribe('log', _buslog) + +# Helper functions for CP apps # + + +def expose(func=None, alias=None): + """Expose the function, optionally providing an alias or set of aliases.""" + def expose_(func): + func.exposed = True + if alias is not None: + if isinstance(alias, basestring): + parents[alias.replace(".", "_")] = func + else: + for a in alias: + parents[a.replace(".", "_")] = func + return func + + import sys, types + if isinstance(func, (types.FunctionType, types.MethodType)): + if alias is None: + # @expose + func.exposed = True + return func + else: + # func = expose(func, alias) + parents = sys._getframe(1).f_locals + return expose_(func) + elif func is None: + if alias is None: + # @expose() + parents = sys._getframe(1).f_locals + return expose_ + else: + # @expose(alias="alias") or + # @expose(alias=["alias1", "alias2"]) + parents = sys._getframe(1).f_locals + return expose_ + else: + # @expose("alias") or + # @expose(["alias1", "alias2"]) + parents = sys._getframe(1).f_locals + alias = func + return expose_ + + +def url(path="", qs="", script_name=None, base=None, relative=None): + """Create an absolute URL for the given path. + + If 'path' starts with a slash ('/'), this will return + (base + script_name + path + qs). + If it does not start with a slash, this returns + (base + script_name [+ request.path_info] + path + qs). + + If script_name is None, cherrypy.request will be used + to find a script_name, if available. + + If base is None, cherrypy.request.base will be used (if available). + Note that you can use cherrypy.tools.proxy to change this. + + Finally, note that this function can be used to obtain an absolute URL + for the current request path (minus the querystring) by passing no args. + If you call url(qs=cherrypy.request.query_string), you should get the + original browser URL (assuming no internal redirections). + + If relative is None or not provided, request.app.relative_urls will + be used (if available, else False). If False, the output will be an + absolute URL (including the scheme, host, vhost, and script_name). + If True, the output will instead be a URL that is relative to the + current request path, perhaps including '..' atoms. If relative is + the string 'server', the output will instead be a URL that is + relative to the server root; i.e., it will start with a slash. + """ + if qs: + qs = '?' + qs + + if request.app: + if not path.startswith("/"): + # Append/remove trailing slash from path_info as needed + # (this is to support mistyped URL's without redirecting; + # if you want to redirect, use tools.trailing_slash). + pi = request.path_info + if request.is_index is True: + if not pi.endswith('/'): + pi = pi + '/' + elif request.is_index is False: + if pi.endswith('/') and pi != '/': + pi = pi[:-1] + + if path == "": + path = pi + else: + path = _urljoin(pi, path) + + if script_name is None: + script_name = request.script_name + if base is None: + base = request.base + + newurl = base + script_name + path + qs + else: + # No request.app (we're being called outside a request). + # We'll have to guess the base from server.* attributes. + # This will produce very different results from the above + # if you're using vhosts or tools.proxy. + if base is None: + base = server.base() + + path = (script_name or "") + path + newurl = base + path + qs + + if './' in newurl: + # Normalize the URL by removing ./ and ../ + atoms = [] + for atom in newurl.split('/'): + if atom == '.': + pass + elif atom == '..': + atoms.pop() + else: + atoms.append(atom) + newurl = '/'.join(atoms) + + # At this point, we should have a fully-qualified absolute URL. + + if relative is None: + relative = getattr(request.app, "relative_urls", False) + + # See http://www.ietf.org/rfc/rfc2396.txt + if relative == 'server': + # "A relative reference beginning with a single slash character is + # termed an absolute-path reference, as defined by ..." + # This is also sometimes called "server-relative". + newurl = '/' + '/'.join(newurl.split('/', 3)[3:]) + elif relative: + # "A relative reference that does not begin with a scheme name + # or a slash character is termed a relative-path reference." + old = url().split('/')[:-1] + new = newurl.split('/') + while old and new: + a, b = old[0], new[0] + if a != b: + break + old.pop(0) + new.pop(0) + new = (['..'] * len(old)) + new + newurl = '/'.join(new) + + return newurl + + +# import _cpconfig last so it can reference other top-level objects +from cherrypy import _cpconfig +# Use _global_conf_alias so quickstart can use 'config' as an arg +# without shadowing cherrypy.config. +config = _global_conf_alias = _cpconfig.Config() + +from cherrypy import _cpchecker +checker = _cpchecker.Checker() +engine.subscribe('start', checker) diff --git a/src/cherrypy/_cpcgifs.py b/src/cherrypy/_cpcgifs.py new file mode 100644 index 0000000000..3eb5dd85df --- /dev/null +++ b/src/cherrypy/_cpcgifs.py @@ -0,0 +1,79 @@ +import cgi +import cherrypy + + +class FieldStorage(cgi.FieldStorage): + def __init__(self, *args, **kwds): + try: + cgi.FieldStorage.__init__(self, *args, **kwds) + except ValueError, ex: + if str(ex) == 'Maximum content length exceeded': + raise cherrypy.HTTPError(status=413) + else: + raise ex + + def read_lines_to_eof(self): + """Internal: read lines until EOF.""" + while 1: + line = self.fp.readline(1<<16) + if not line: + self.done = -1 + break + self.__write(line) + + def read_lines_to_outerboundary(self): + """Internal: read lines until outerboundary.""" + next = "--" + self.outerboundary + last = next + "--" + delim = "" + last_line_lfend = True + while 1: + line = self.fp.readline(1<<16) + if not line: + self.done = -1 + break + if line[:2] == "--" and last_line_lfend: + strippedline = line.strip() + if strippedline == next: + break + if strippedline == last: + self.done = 1 + break + odelim = delim + if line[-2:] == "\r\n": + delim = "\r\n" + line = line[:-2] + last_line_lfend = True + elif line[-1] == "\n": + delim = "\n" + line = line[:-1] + last_line_lfend = True + else: + delim = "" + last_line_lfend = False + self.__write(odelim + line) + + def skip_lines(self): + """Internal: skip lines until outer boundary if defined.""" + if not self.outerboundary or self.done: + return + next = "--" + self.outerboundary + last = next + "--" + last_line_lfend = True + while 1: + line = self.fp.readline(1<<16) + if not line: + self.done = -1 + break + if line[:2] == "--" and last_line_lfend: + strippedline = line.strip() + if strippedline == next: + break + if strippedline == last: + self.done = 1 + break + if line.endswith('\n'): + last_line_lfend = True + else: + last_line_lfend = False + diff --git a/src/cherrypy/_cpchecker.py b/src/cherrypy/_cpchecker.py new file mode 100644 index 0000000000..0d98e84918 --- /dev/null +++ b/src/cherrypy/_cpchecker.py @@ -0,0 +1,269 @@ +import os +import warnings + +import cherrypy + + +class Checker(object): + """A checker for CherryPy sites and their mounted applications. + + on: set this to False to turn off the checker completely. + + When this object is called at engine startup, it executes each + of its own methods whose names start with "check_". If you wish + to disable selected checks, simply add a line in your global + config which sets the appropriate method to False: + + [global] + checker.check_skipped_app_config = False + + You may also dynamically add or replace check_* methods in this way. + """ + + on = True + + def __init__(self): + self._populate_known_types() + + def __call__(self): + """Run all check_* methods.""" + if self.on: + oldformatwarning = warnings.formatwarning + warnings.formatwarning = self.formatwarning + try: + for name in dir(self): + if name.startswith("check_"): + method = getattr(self, name) + if method and callable(method): + method() + finally: + warnings.formatwarning = oldformatwarning + + def formatwarning(self, message, category, filename, lineno): + """Function to format a warning.""" + return "CherryPy Checker:\n%s\n\n" % message + + # This value should be set inside _cpconfig. + global_config_contained_paths = False + + def check_skipped_app_config(self): + for sn, app in cherrypy.tree.apps.iteritems(): + if not isinstance(app, cherrypy.Application): + continue + if not app.config: + msg = "The Application mounted at %r has an empty config." % sn + if self.global_config_contained_paths: + msg += (" It looks like the config you passed to " + "cherrypy.config.update() contains application-" + "specific sections. You must explicitly pass " + "application config via " + "cherrypy.tree.mount(..., config=app_config)") + warnings.warn(msg) + return + + def check_static_paths(self): + # Use the dummy Request object in the main thread. + request = cherrypy.request + for sn, app in cherrypy.tree.apps.iteritems(): + if not isinstance(app, cherrypy.Application): + continue + request.app = app + for section in app.config: + # get_resource will populate request.config + request.get_resource(section + "/dummy.html") + conf = request.config.get + + if conf("tools.staticdir.on", False): + msg = "" + root = conf("tools.staticdir.root") + dir = conf("tools.staticdir.dir") + if dir is None: + msg = "tools.staticdir.dir is not set." + else: + fulldir = "" + if os.path.isabs(dir): + fulldir = dir + if root: + msg = ("dir is an absolute path, even " + "though a root is provided.") + testdir = os.path.join(root, dir[1:]) + if os.path.exists(testdir): + msg += ("\nIf you meant to serve the " + "filesystem folder at %r, remove " + "the leading slash from dir." % testdir) + else: + if not root: + msg = "dir is a relative path and no root provided." + else: + fulldir = os.path.join(root, dir) + if not os.path.isabs(fulldir): + msg = "%r is not an absolute path." % fulldir + + if fulldir and not os.path.exists(fulldir): + if msg: + msg += "\n" + msg += ("%r (root + dir) is not an existing " + "filesystem path." % fulldir) + + if msg: + warnings.warn("%s\nsection: [%s]\nroot: %r\ndir: %r" + % (msg, section, root, dir)) + + + # -------------------------- Compatibility -------------------------- # + + obsolete = { + 'server.default_content_type': 'tools.response_headers.headers', + 'log_access_file': 'log.access_file', + 'log_config_options': None, + 'log_file': 'log.error_file', + 'log_file_not_found': None, + 'log_request_headers': 'tools.log_headers.on', + 'log_to_screen': 'log.screen', + 'show_tracebacks': 'request.show_tracebacks', + 'throw_errors': 'request.throw_errors', + 'profiler.on': ('cherrypy.tree.mount(profiler.make_app(' + 'cherrypy.Application(Root())))'), + } + + deprecated = {} + + def _compat(self, config): + """Process config and warn on each obsolete or deprecated entry.""" + for section, conf in config.iteritems(): + if isinstance(conf, dict): + for k, v in conf.iteritems(): + if k in self.obsolete: + warnings.warn("%r is obsolete. Use %r instead.\n" + "section: [%s]" % + (k, self.obsolete[k], section)) + elif k in self.deprecated: + warnings.warn("%r is deprecated. Use %r instead.\n" + "section: [%s]" % + (k, self.deprecated[k], section)) + else: + if section in self.obsolete: + warnings.warn("%r is obsolete. Use %r instead." + % (section, self.obsolete[section])) + elif section in self.deprecated: + warnings.warn("%r is deprecated. Use %r instead." + % (section, self.deprecated[section])) + + def check_compatibility(self): + """Process config and warn on each obsolete or deprecated entry.""" + self._compat(cherrypy.config) + for sn, app in cherrypy.tree.apps.iteritems(): + if not isinstance(app, cherrypy.Application): + continue + self._compat(app.config) + + + # ------------------------ Known Namespaces ------------------------ # + + extra_config_namespaces = [] + + def _known_ns(self, app): + ns = ["wsgi"] + ns.extend(app.toolboxes.keys()) + ns.extend(app.namespaces.keys()) + ns.extend(app.request_class.namespaces.keys()) + ns.extend(cherrypy.config.namespaces.keys()) + ns += self.extra_config_namespaces + + for section, conf in app.config.iteritems(): + is_path_section = section.startswith("/") + if is_path_section and isinstance(conf, dict): + for k, v in conf.iteritems(): + atoms = k.split(".") + if len(atoms) > 1: + if atoms[0] not in ns: + # Spit out a special warning if a known + # namespace is preceded by "cherrypy." + if (atoms[0] == "cherrypy" and atoms[1] in ns): + msg = ("The config entry %r is invalid; " + "try %r instead.\nsection: [%s]" + % (k, ".".join(atoms[1:]), section)) + else: + msg = ("The config entry %r is invalid, because " + "the %r config namespace is unknown.\n" + "section: [%s]" % (k, atoms[0], section)) + warnings.warn(msg) + elif atoms[0] == "tools": + if atoms[1] not in dir(cherrypy.tools): + msg = ("The config entry %r may be invalid, " + "because the %r tool was not found.\n" + "section: [%s]" % (k, atoms[1], section)) + warnings.warn(msg) + + def check_config_namespaces(self): + """Process config and warn on each unknown config namespace.""" + for sn, app in cherrypy.tree.apps.iteritems(): + if not isinstance(app, cherrypy.Application): + continue + self._known_ns(app) + + + + + # -------------------------- Config Types -------------------------- # + + known_config_types = {} + + def _populate_known_types(self): + import __builtin__ + builtins = [x for x in vars(__builtin__).values() + if type(x) is type(str)] + + def traverse(obj, namespace): + for name in dir(obj): + vtype = type(getattr(obj, name, None)) + if vtype in builtins: + self.known_config_types[namespace + "." + name] = vtype + + traverse(cherrypy.request, "request") + traverse(cherrypy.response, "response") + traverse(cherrypy.server, "server") + traverse(cherrypy.engine, "engine") + traverse(cherrypy.log, "log") + + def _known_types(self, config): + msg = ("The config entry %r in section %r is of type %r, " + "which does not match the expected type %r.") + + for section, conf in config.iteritems(): + if isinstance(conf, dict): + for k, v in conf.iteritems(): + if v is not None: + expected_type = self.known_config_types.get(k, None) + vtype = type(v) + if expected_type and vtype != expected_type: + warnings.warn(msg % (k, section, vtype.__name__, + expected_type.__name__)) + else: + k, v = section, conf + if v is not None: + expected_type = self.known_config_types.get(k, None) + vtype = type(v) + if expected_type and vtype != expected_type: + warnings.warn(msg % (k, section, vtype.__name__, + expected_type.__name__)) + + def check_config_types(self): + """Assert that config values are of the same type as default values.""" + self._known_types(cherrypy.config) + for sn, app in cherrypy.tree.apps.iteritems(): + if not isinstance(app, cherrypy.Application): + continue + self._known_types(app.config) + + + # -------------------- Specific config warnings -------------------- # + + def check_localhost(self): + """Warn if any socket_host is 'localhost'. See #711.""" + for k, v in cherrypy.config.iteritems(): + if k == 'server.socket_host' and v == 'localhost': + warnings.warn("The use of 'localhost' as a socket host can " + "cause problems on newer systems, since 'localhost' can " + "map to either an IPv4 or an IPv6 address. You should " + "use '127.0.0.1' or '[::1]' instead.") diff --git a/src/cherrypy/_cpconfig.py b/src/cherrypy/_cpconfig.py new file mode 100644 index 0000000000..815bb1936e --- /dev/null +++ b/src/cherrypy/_cpconfig.py @@ -0,0 +1,372 @@ +"""Configuration system for CherryPy. + +Configuration in CherryPy is implemented via dictionaries. Keys are strings +which name the mapped value, which may be of any type. + + +Architecture +------------ + +CherryPy Requests are part of an Application, which runs in a global context, +and configuration data may apply to any of those three scopes: + + Global: configuration entries which apply everywhere are stored in + cherrypy.config. + + Application: entries which apply to each mounted application are stored + on the Application object itself, as 'app.config'. This is a two-level + dict where each key is a path, or "relative URL" (for example, "/" or + "/path/to/my/page"), and each value is a config dict. Usually, this + data is provided in the call to tree.mount(root(), config=conf), + although you may also use app.merge(conf). + + Request: each Request object possesses a single 'Request.config' dict. + Early in the request process, this dict is populated by merging global + config entries, Application entries (whose path equals or is a parent + of Request.path_info), and any config acquired while looking up the + page handler (see next). + + +Declaration +----------- + +Configuration data may be supplied as a Python dictionary, as a filename, +or as an open file object. When you supply a filename or file, CherryPy +uses Python's builtin ConfigParser; you declare Application config by +writing each path as a section header: + + [/path/to/my/page] + request.stream = True + +To declare global configuration entries, place them in a [global] section. + +You may also declare config entries directly on the classes and methods +(page handlers) that make up your CherryPy application via the '_cp_config' +attribute. For example: + + class Demo: + _cp_config = {'tools.gzip.on': True} + + def index(self): + return "Hello world" + index.exposed = True + index._cp_config = {'request.show_tracebacks': False} + +Note, however, that this behavior is only guaranteed for the default +dispatcher. Other dispatchers may have different restrictions on where +you can attach _cp_config attributes. + + +Namespaces +---------- + +Configuration keys are separated into namespaces by the first "." in the key. +Current namespaces: + + engine: Controls the 'application engine', including autoreload. + These can only be declared in the global config. + tree: Grafts cherrypy.Application objects onto cherrypy.tree. + These can only be declared in the global config. + hooks: Declares additional request-processing functions. + log: Configures the logging for each application. + These can only be declared in the global or / config. + request: Adds attributes to each Request. + response: Adds attributes to each Response. + server: Controls the default HTTP server via cherrypy.server. + These can only be declared in the global config. + tools: Runs and configures additional request-processing packages. + wsgi: Adds WSGI middleware to an Application's "pipeline". + These can only be declared in the app's root config ("/"). + checker: Controls the 'checker', which looks for common errors in + app state (including config) when the engine starts. + Global config only. + +The only key that does not exist in a namespace is the "environment" entry. +This special entry 'imports' other config entries from a template stored in +cherrypy._cpconfig.environments[environment]. It only applies to the global +config, and only when you use cherrypy.config.update. + +You can define your own namespaces to be called at the Global, Application, +or Request level, by adding a named handler to cherrypy.config.namespaces, +app.namespaces, or app.request_class.namespaces. The name can +be any string, and the handler must be either a callable or a (Python 2.5 +style) context manager. +""" + +import ConfigParser +try: + set +except NameError: + from sets import Set as set +import sys + +import cherrypy + + +environments = { + "staging": { + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + }, + "production": { + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'log.screen': False, + }, + "embedded": { + # For use with CherryPy embedded in another deployment stack. + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'log.screen': False, + 'engine.SIGHUP': None, + 'engine.SIGTERM': None, + }, + "test_suite": { + 'engine.autoreload_on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': True, + 'log.screen': False, + }, + } + +def as_dict(config): + """Return a dict from 'config' whether it is a dict, file, or filename.""" + if isinstance(config, basestring): + config = _Parser().dict_from_file(config) + elif hasattr(config, 'read'): + config = _Parser().dict_from_file(config) + return config + +def merge(base, other): + """Merge one app config (from a dict, file, or filename) into another. + + If the given config is a filename, it will be appended to + the list of files to monitor for "autoreload" changes. + """ + if isinstance(other, basestring): + cherrypy.engine.autoreload.files.add(other) + + # Load other into base + for section, value_map in as_dict(other).iteritems(): + base.setdefault(section, {}).update(value_map) + + +class NamespaceSet(dict): + """A dict of config namespace names and handlers. + + Each config entry should begin with a namespace name; the corresponding + namespace handler will be called once for each config entry in that + namespace, and will be passed two arguments: the config key (with the + namespace removed) and the config value. + + Namespace handlers may be any Python callable; they may also be + Python 2.5-style 'context managers', in which case their __enter__ + method should return a callable to be used as the handler. + See cherrypy.tools (the Toolbox class) for an example. + """ + + def __call__(self, config): + """Iterate through config and pass it to each namespace handler. + + 'config' should be a flat dict, where keys use dots to separate + namespaces, and values are arbitrary. + + The first name in each config key is used to look up the corresponding + namespace handler. For example, a config entry of {'tools.gzip.on': v} + will call the 'tools' namespace handler with the args: ('gzip.on', v) + """ + # Separate the given config into namespaces + ns_confs = {} + for k in config: + if "." in k: + ns, name = k.split(".", 1) + bucket = ns_confs.setdefault(ns, {}) + bucket[name] = config[k] + + # I chose __enter__ and __exit__ so someday this could be + # rewritten using Python 2.5's 'with' statement: + # for ns, handler in self.iteritems(): + # with handler as callable: + # for k, v in ns_confs.get(ns, {}).iteritems(): + # callable(k, v) + for ns, handler in self.iteritems(): + exit = getattr(handler, "__exit__", None) + if exit: + callable = handler.__enter__() + no_exc = True + try: + try: + for k, v in ns_confs.get(ns, {}).iteritems(): + callable(k, v) + except: + # The exceptional case is handled here + no_exc = False + if exit is None: + raise + if not exit(*sys.exc_info()): + raise + # The exception is swallowed if exit() returns true + finally: + # The normal and non-local-goto cases are handled here + if no_exc and exit: + exit(None, None, None) + else: + for k, v in ns_confs.get(ns, {}).iteritems(): + handler(k, v) + + def __repr__(self): + return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, + dict.__repr__(self)) + + def __copy__(self): + newobj = self.__class__() + newobj.update(self) + return newobj + copy = __copy__ + + +class Config(dict): + """The 'global' configuration data for the entire CherryPy process.""" + + defaults = { + 'tools.log_tracebacks.on': True, + 'tools.log_headers.on': True, + 'tools.trailing_slash.on': True, + } + + namespaces = NamespaceSet( + **{"server": lambda k, v: setattr(cherrypy.server, k, v), + "log": lambda k, v: setattr(cherrypy.log, k, v), + "checker": lambda k, v: setattr(cherrypy.checker, k, v), + }) + + def __init__(self): + self.reset() + + def reset(self): + """Reset self to default values.""" + self.clear() + dict.update(self, self.defaults) + + def update(self, config): + """Update self from a dict, file or filename.""" + if isinstance(config, basestring): + # Filename + cherrypy.engine.autoreload.files.add(config) + config = _Parser().dict_from_file(config) + elif hasattr(config, 'read'): + # Open file object + config = _Parser().dict_from_file(config) + else: + config = config.copy() + + if isinstance(config.get("global", None), dict): + if len(config) > 1: + cherrypy.checker.global_config_contained_paths = True + config = config["global"] + + which_env = config.get('environment') + if which_env: + env = environments[which_env] + for k in env: + if k not in config: + config[k] = env[k] + + if 'tools.staticdir.dir' in config: + config['tools.staticdir.section'] = "global" + + dict.update(self, config) + self.namespaces(config) + + def __setitem__(self, k, v): + dict.__setitem__(self, k, v) + self.namespaces({k: v}) + + +def _engine_namespace_handler(k, v): + """Backward compatibility handler for the "engine" namespace.""" + engine = cherrypy.engine + if k == 'autoreload_on': + if v: + engine.autoreload.subscribe() + else: + engine.autoreload.unsubscribe() + elif k == 'autoreload_frequency': + engine.autoreload.frequency = v + elif k == 'autoreload_match': + engine.autoreload.match = v + elif k == 'reload_files': + engine.autoreload.files = v + elif k == 'deadlock_poll_freq': + engine.timeout_monitor.frequency = v + elif k == 'SIGHUP': + engine.listeners['SIGHUP'] = set([v]) + elif k == 'SIGTERM': + engine.listeners['SIGTERM'] = set([v]) +Config.namespaces["engine"] = _engine_namespace_handler + + +def _tree_namespace_handler(k, v): + """Namespace handler for the 'tree' config namespace.""" + cherrypy.tree.graft(v, v.script_name) + cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/")) +Config.namespaces["tree"] = _tree_namespace_handler + + +class _Parser(ConfigParser.ConfigParser): + """Sub-class of ConfigParser that keeps the case of options and that raises + an exception if the file cannot be read. + """ + + def optionxform(self, optionstr): + return optionstr + + def read(self, filenames): + if isinstance(filenames, basestring): + filenames = [filenames] + for filename in filenames: + # try: + # fp = open(filename) + # except IOError: + # continue + fp = open(filename) + try: + self._read(fp, filename) + finally: + fp.close() + + def as_dict(self, raw=False, vars=None): + """Convert an INI file to a dictionary""" + # Load INI file into a dict + from cherrypy.lib import unrepr + result = {} + for section in self.sections(): + if section not in result: + result[section] = {} + for option in self.options(section): + value = self.get(section, option, raw, vars) + try: + value = unrepr(value) + except Exception, x: + msg = ("Config error in section: %r, option: %r, " + "value: %r. Config values must be valid Python." % + (section, option, value)) + raise ValueError(msg, x.__class__.__name__, x.args) + result[section][option] = value + return result + + def dict_from_file(self, file): + if hasattr(file, 'read'): + self.readfp(file) + else: + self.read(file) + return self.as_dict() + +del ConfigParser diff --git a/src/cherrypy/_cpdispatch.py b/src/cherrypy/_cpdispatch.py new file mode 100644 index 0000000000..4e7104bf72 --- /dev/null +++ b/src/cherrypy/_cpdispatch.py @@ -0,0 +1,396 @@ +"""CherryPy dispatchers. + +A 'dispatcher' is the object which looks up the 'page handler' callable +and collects config for the current request based on the path_info, other +request attributes, and the application architecture. The core calls the +dispatcher as early as possible, passing it a 'path_info' argument. + +The default dispatcher discovers the page handler by matching path_info +to a hierarchical arrangement of objects, starting at request.app.root. +""" + +import cherrypy + + +class PageHandler(object): + """Callable which sets response.body.""" + + def __init__(self, callable, *args, **kwargs): + self.callable = callable + self.args = args + self.kwargs = kwargs + + def __call__(self): + return self.callable(*self.args, **self.kwargs) + + +class LateParamPageHandler(PageHandler): + """When passing cherrypy.request.params to the page handler, we do not + want to capture that dict too early; we want to give tools like the + decoding tool a chance to modify the params dict in-between the lookup + of the handler and the actual calling of the handler. This subclass + takes that into account, and allows request.params to be 'bound late' + (it's more complicated than that, but that's the effect). + """ + + def _get_kwargs(self): + kwargs = cherrypy.request.params.copy() + if self._kwargs: + kwargs.update(self._kwargs) + return kwargs + + def _set_kwargs(self, kwargs): + self._kwargs = kwargs + + kwargs = property(_get_kwargs, _set_kwargs, + doc='page handler kwargs (with ' + 'cherrypy.request.params copied in)') + + +class Dispatcher(object): + """CherryPy Dispatcher which walks a tree of objects to find a handler. + + The tree is rooted at cherrypy.request.app.root, and each hierarchical + component in the path_info argument is matched to a corresponding nested + attribute of the root object. Matching handlers must have an 'exposed' + attribute which evaluates to True. The special method name "index" + matches a URI which ends in a slash ("/"). The special method name + "default" may match a portion of the path_info (but only when no longer + substring of the path_info matches some other object). + + This is the default, built-in dispatcher for CherryPy. + """ + + def __call__(self, path_info): + """Set handler and config for the current request.""" + request = cherrypy.request + func, vpath = self.find_handler(path_info) + + if func: + # Decode any leftover %2F in the virtual_path atoms. + vpath = [x.replace("%2F", "/") for x in vpath] + request.handler = LateParamPageHandler(func, *vpath) + else: + request.handler = cherrypy.NotFound() + + def find_handler(self, path): + """Return the appropriate page handler, plus any virtual path. + + This will return two objects. The first will be a callable, + which can be used to generate page output. Any parameters from + the query string or request body will be sent to that callable + as keyword arguments. + + The callable is found by traversing the application's tree, + starting from cherrypy.request.app.root, and matching path + components to successive objects in the tree. For example, the + URL "/path/to/handler" might return root.path.to.handler. + + The second object returned will be a list of names which are + 'virtual path' components: parts of the URL which are dynamic, + and were not used when looking up the handler. + These virtual path components are passed to the handler as + positional arguments. + """ + request = cherrypy.request + app = request.app + root = app.root + + # Get config for the root object/path. + curpath = "" + nodeconf = {} + if hasattr(root, "_cp_config"): + nodeconf.update(root._cp_config) + if "/" in app.config: + nodeconf.update(app.config["/"]) + object_trail = [['root', root, nodeconf, curpath]] + + node = root + names = [x for x in path.strip('/').split('/') if x] + ['index'] + for name in names: + # map to legal Python identifiers (replace '.' with '_') + objname = name.replace('.', '_') + + nodeconf = {} + node = getattr(node, objname, None) + if node is not None: + # Get _cp_config attached to this node. + if hasattr(node, "_cp_config"): + nodeconf.update(node._cp_config) + + # Mix in values from app.config for this path. + curpath = "/".join((curpath, name)) + if curpath in app.config: + nodeconf.update(app.config[curpath]) + + object_trail.append([name, node, nodeconf, curpath]) + + def set_conf(): + """Collapse all object_trail config into cherrypy.request.config.""" + base = cherrypy.config.copy() + # Note that we merge the config from each node + # even if that node was None. + for name, obj, conf, curpath in object_trail: + base.update(conf) + if 'tools.staticdir.dir' in conf: + base['tools.staticdir.section'] = curpath + return base + + # Try successive objects (reverse order) + num_candidates = len(object_trail) - 1 + for i in xrange(num_candidates, -1, -1): + + name, candidate, nodeconf, curpath = object_trail[i] + if candidate is None: + continue + + # Try a "default" method on the current leaf. + if hasattr(candidate, "default"): + defhandler = candidate.default + if getattr(defhandler, 'exposed', False): + # Insert any extra _cp_config from the default handler. + conf = getattr(defhandler, "_cp_config", {}) + object_trail.insert(i+1, ["default", defhandler, conf, curpath]) + request.config = set_conf() + # See http://www.cherrypy.org/ticket/613 + request.is_index = path.endswith("/") + return defhandler, names[i:-1] + + # Uncomment the next line to restrict positional params to "default". + # if i < num_candidates - 2: continue + + # Try the current leaf. + if getattr(candidate, 'exposed', False): + request.config = set_conf() + if i == num_candidates: + # We found the extra ".index". Mark request so tools + # can redirect if path_info has no trailing slash. + request.is_index = True + else: + # We're not at an 'index' handler. Mark request so tools + # can redirect if path_info has NO trailing slash. + # Note that this also includes handlers which take + # positional parameters (virtual paths). + request.is_index = False + return candidate, names[i:-1] + + # We didn't find anything + request.config = set_conf() + return None, [] + + +class MethodDispatcher(Dispatcher): + """Additional dispatch based on cherrypy.request.method.upper(). + + Methods named GET, POST, etc will be called on an exposed class. + The method names must be all caps; the appropriate Allow header + will be output showing all capitalized method names as allowable + HTTP verbs. + + Note that the containing class must be exposed, not the methods. + """ + + def __call__(self, path_info): + """Set handler and config for the current request.""" + request = cherrypy.request + resource, vpath = self.find_handler(path_info) + + if resource: + # Set Allow header + avail = [m for m in dir(resource) if m.isupper()] + if "GET" in avail and "HEAD" not in avail: + avail.append("HEAD") + avail.sort() + cherrypy.response.headers['Allow'] = ", ".join(avail) + + # Find the subhandler + meth = request.method.upper() + func = getattr(resource, meth, None) + if func is None and meth == "HEAD": + func = getattr(resource, "GET", None) + if func: + # Decode any leftover %2F in the virtual_path atoms. + vpath = [x.replace("%2F", "/") for x in vpath] + request.handler = LateParamPageHandler(func, *vpath) + else: + request.handler = cherrypy.HTTPError(405) + else: + request.handler = cherrypy.NotFound() + + +class RoutesDispatcher(object): + """A Routes based dispatcher for CherryPy.""" + + def __init__(self, full_result=False): + """ + Routes dispatcher + + Set full_result to True if you wish the controller + and the action to be passed on to the page handler + parameters. By default they won't be. + """ + import routes + self.full_result = full_result + self.controllers = {} + self.mapper = routes.Mapper() + self.mapper.controller_scan = self.controllers.keys + + def connect(self, name, route, controller, **kwargs): + self.controllers[name] = controller + self.mapper.connect(name, route, controller=name, **kwargs) + + def redirect(self, url): + raise cherrypy.HTTPRedirect(url) + + def __call__(self, path_info): + """Set handler and config for the current request.""" + func = self.find_handler(path_info) + if func: + cherrypy.request.handler = LateParamPageHandler(func) + else: + cherrypy.request.handler = cherrypy.NotFound() + + def find_handler(self, path_info): + """Find the right page handler, and set request.config.""" + import routes + + request = cherrypy.request + + config = routes.request_config() + config.mapper = self.mapper + if hasattr(cherrypy.request, 'wsgi_environ'): + config.environ = cherrypy.request.wsgi_environ + config.host = request.headers.get('Host', None) + config.protocol = request.scheme + config.redirect = self.redirect + + result = self.mapper.match(path_info) + + config.mapper_dict = result + params = {} + if result: + params = result.copy() + if not self.full_result: + params.pop('controller', None) + params.pop('action', None) + request.params.update(params) + + # Get config for the root object/path. + request.config = base = cherrypy.config.copy() + curpath = "" + + def merge(nodeconf): + if 'tools.staticdir.dir' in nodeconf: + nodeconf['tools.staticdir.section'] = curpath or "/" + base.update(nodeconf) + + app = request.app + root = app.root + if hasattr(root, "_cp_config"): + merge(root._cp_config) + if "/" in app.config: + merge(app.config["/"]) + + # Mix in values from app.config. + atoms = [x for x in path_info.split("/") if x] + if atoms: + last = atoms.pop() + else: + last = None + for atom in atoms: + curpath = "/".join((curpath, atom)) + if curpath in app.config: + merge(app.config[curpath]) + + handler = None + if result: + controller = result.get('controller', None) + controller = self.controllers.get(controller) + if controller: + # Get config from the controller. + if hasattr(controller, "_cp_config"): + merge(controller._cp_config) + + action = result.get('action', None) + if action is not None: + handler = getattr(controller, action, None) + # Get config from the handler + if hasattr(handler, "_cp_config"): + merge(handler._cp_config) + + # Do the last path atom here so it can + # override the controller's _cp_config. + if last: + curpath = "/".join((curpath, last)) + if curpath in app.config: + merge(app.config[curpath]) + + return handler + + +def XMLRPCDispatcher(next_dispatcher=Dispatcher()): + from cherrypy.lib import xmlrpc + def xmlrpc_dispatch(path_info): + path_info = xmlrpc.patched_path(path_info) + return next_dispatcher(path_info) + return xmlrpc_dispatch + + +def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, **domains): + """Select a different handler based on the Host header. + + This can be useful when running multiple sites within one CP server. + It allows several domains to point to different parts of a single + website structure. For example: + + http://www.domain.example -> root + http://www.domain2.example -> root/domain2/ + http://www.domain2.example:443 -> root/secure + + can be accomplished via the following config: + + [/] + request.dispatch = cherrypy.dispatch.VirtualHost( + **{'www.domain2.example': '/domain2', + 'www.domain2.example:443': '/secure', + }) + + next_dispatcher: the next dispatcher object in the dispatch chain. + The VirtualHost dispatcher adds a prefix to the URL and calls + another dispatcher. Defaults to cherrypy.dispatch.Dispatcher(). + + use_x_forwarded_host: if True (the default), any "X-Forwarded-Host" + request header will be used instead of the "Host" header. This + is commonly added by HTTP servers (such as Apache) when proxying. + + **domains: a dict of {host header value: virtual prefix} pairs. + The incoming "Host" request header is looked up in this dict, + and, if a match is found, the corresponding "virtual prefix" + value will be prepended to the URL path before calling the + next dispatcher. Note that you often need separate entries + for "example.com" and "www.example.com". In addition, "Host" + headers may contain the port number. + """ + from cherrypy.lib import http + def vhost_dispatch(path_info): + header = cherrypy.request.headers.get + + domain = header('Host', '') + if use_x_forwarded_host: + domain = header("X-Forwarded-Host", domain) + + prefix = domains.get(domain, "") + if prefix: + path_info = http.urljoin(prefix, path_info) + + result = next_dispatcher(path_info) + + # Touch up staticdir config. See http://www.cherrypy.org/ticket/614. + section = cherrypy.request.config.get('tools.staticdir.section') + if section: + section = section[len(prefix):] + cherrypy.request.config['tools.staticdir.section'] = section + + return result + return vhost_dispatch + diff --git a/src/cherrypy/_cperror.py b/src/cherrypy/_cperror.py new file mode 100644 index 0000000000..506a6652d9 --- /dev/null +++ b/src/cherrypy/_cperror.py @@ -0,0 +1,384 @@ +"""Error classes for CherryPy.""" + +from cgi import escape as _escape +from sys import exc_info as _exc_info +from traceback import format_exception as _format_exception +from urlparse import urljoin as _urljoin +from cherrypy.lib import http as _http + + +class CherryPyException(Exception): + pass + + +class TimeoutError(CherryPyException): + """Exception raised when Response.timed_out is detected.""" + pass + + +class InternalRedirect(CherryPyException): + """Exception raised to switch to the handler for a different URL. + + Any request.params must be supplied in a query string. + """ + + def __init__(self, path): + import cherrypy + request = cherrypy.request + + self.query_string = "" + if "?" in path: + # Separate any params included in the path + path, self.query_string = path.split("?", 1) + + # Note that urljoin will "do the right thing" whether url is: + # 1. a URL relative to root (e.g. "/dummy") + # 2. a URL relative to the current path + # Note that any query string will be discarded. + path = _urljoin(request.path_info, path) + + # Set a 'path' member attribute so that code which traps this + # error can have access to it. + self.path = path + + CherryPyException.__init__(self, path, self.query_string) + + +class HTTPRedirect(CherryPyException): + """Exception raised when the request should be redirected. + + The new URL must be passed as the first argument to the Exception, + e.g., HTTPRedirect(newUrl). Multiple URLs are allowed. If a URL is + absolute, it will be used as-is. If it is relative, it is assumed + to be relative to the current cherrypy.request.path_info. + """ + + def __init__(self, urls, status=None): + import cherrypy + request = cherrypy.request + + if isinstance(urls, basestring): + urls = [urls] + + abs_urls = [] + for url in urls: + # Note that urljoin will "do the right thing" whether url is: + # 1. a complete URL with host (e.g. "http://www.example.com/test") + # 2. a URL relative to root (e.g. "/dummy") + # 3. a URL relative to the current path + # Note that any query string in cherrypy.request is discarded. + url = _urljoin(cherrypy.url(), url) + abs_urls.append(url) + self.urls = abs_urls + + # RFC 2616 indicates a 301 response code fits our goal; however, + # browser support for 301 is quite messy. Do 302/303 instead. See + # http://ppewww.ph.gla.ac.uk/~flavell/www/post-redirect.html + if status is None: + if request.protocol >= (1, 1): + status = 303 + else: + status = 302 + else: + status = int(status) + if status < 300 or status > 399: + raise ValueError("status must be between 300 and 399.") + + self.status = status + CherryPyException.__init__(self, abs_urls, status) + + def set_response(self): + """Modify cherrypy.response status, headers, and body to represent self. + + CherryPy uses this internally, but you can also use it to create an + HTTPRedirect object and set its output without *raising* the exception. + """ + import cherrypy + response = cherrypy.response + response.status = status = self.status + + if status in (300, 301, 302, 303, 307): + response.headers['Content-Type'] = "text/html" + # "The ... URI SHOULD be given by the Location field + # in the response." + response.headers['Location'] = self.urls[0] + + # "Unless the request method was HEAD, the entity of the response + # SHOULD contain a short hypertext note with a hyperlink to the + # new URI(s)." + msg = {300: "This resource can be found at %s.", + 301: "This resource has permanently moved to %s.", + 302: "This resource resides temporarily at %s.", + 303: "This resource can be found at %s.", + 307: "This resource has moved temporarily to %s.", + }[status] + response.body = "
\n".join([msg % (u, u) for u in self.urls]) + # Previous code may have set C-L, so we have to reset it + # (allow finalize to set it). + response.headers.pop('Content-Length', None) + elif status == 304: + # Not Modified. + # "The response MUST include the following header fields: + # Date, unless its omission is required by section 14.18.1" + # The "Date" header should have been set in Response.__init__ + + # "...the response SHOULD NOT include other entity-headers." + for key in ('Allow', 'Content-Encoding', 'Content-Language', + 'Content-Length', 'Content-Location', 'Content-MD5', + 'Content-Range', 'Content-Type', 'Expires', + 'Last-Modified'): + if key in response.headers: + del response.headers[key] + + # "The 304 response MUST NOT contain a message-body." + response.body = None + # Previous code may have set C-L, so we have to reset it. + response.headers.pop('Content-Length', None) + elif status == 305: + # Use Proxy. + # self.urls[0] should be the URI of the proxy. + response.headers['Location'] = self.urls[0] + response.body = None + # Previous code may have set C-L, so we have to reset it. + response.headers.pop('Content-Length', None) + else: + raise ValueError("The %s status code is unknown." % status) + + def __call__(self): + """Use this exception as a request.handler (raise self).""" + raise self + + +def clean_headers(status): + """Remove any headers which should not apply to an error response.""" + import cherrypy + + response = cherrypy.response + + # Remove headers which applied to the original content, + # but do not apply to the error page. + respheaders = response.headers + for key in ["Accept-Ranges", "Age", "ETag", "Location", "Retry-After", + "Vary", "Content-Encoding", "Content-Length", "Expires", + "Content-Location", "Content-MD5", "Last-Modified"]: + if respheaders.has_key(key): + del respheaders[key] + + if status != 416: + # A server sending a response with status code 416 (Requested + # range not satisfiable) SHOULD include a Content-Range field + # with a byte-range-resp-spec of "*". The instance-length + # specifies the current length of the selected resource. + # A response with status code 206 (Partial Content) MUST NOT + # include a Content-Range field with a byte-range- resp-spec of "*". + if respheaders.has_key("Content-Range"): + del respheaders["Content-Range"] + + +class HTTPError(CherryPyException): + """ Exception used to return an HTTP error code (4xx-5xx) to the client. + This exception will automatically set the response status and body. + + A custom message (a long description to display in the browser) + can be provided in place of the default. + """ + + def __init__(self, status=500, message=None): + self.status = status = int(status) + if status < 400 or status > 599: + raise ValueError("status must be between 400 and 599.") + self.message = message + CherryPyException.__init__(self, status, message) + + def set_response(self): + """Modify cherrypy.response status, headers, and body to represent self. + + CherryPy uses this internally, but you can also use it to create an + HTTPError object and set its output without *raising* the exception. + """ + import cherrypy + + response = cherrypy.response + + clean_headers(self.status) + + # In all cases, finalize will be called after this method, + # so don't bother cleaning up response values here. + response.status = self.status + tb = None + if cherrypy.request.show_tracebacks: + tb = format_exc() + response.headers['Content-Type'] = "text/html" + + content = self.get_error_page(self.status, traceback=tb, + message=self.message) + response.body = content + response.headers['Content-Length'] = len(content) + + _be_ie_unfriendly(self.status) + + def get_error_page(self, *args, **kwargs): + return get_error_page(*args, **kwargs) + + def __call__(self): + """Use this exception as a request.handler (raise self).""" + raise self + + +class NotFound(HTTPError): + """Exception raised when a URL could not be mapped to any handler (404).""" + + def __init__(self, path=None): + if path is None: + import cherrypy + path = cherrypy.request.script_name + cherrypy.request.path_info + self.args = (path,) + HTTPError.__init__(self, 404, "The path %r was not found." % path) + + +_HTTPErrorTemplate = ''' + + + + %(status)s + + + +

%(status)s

+

%(message)s

+
%(traceback)s
+
+ Powered by CherryPy %(version)s +
+ + +''' + +def get_error_page(status, **kwargs): + """Return an HTML page, containing a pretty error response. + + status should be an int or a str. + kwargs will be interpolated into the page template. + """ + import cherrypy + + try: + code, reason, message = _http.valid_status(status) + except ValueError, x: + raise cherrypy.HTTPError(500, x.args[0]) + + # We can't use setdefault here, because some + # callers send None for kwarg values. + if kwargs.get('status') is None: + kwargs['status'] = "%s %s" % (code, reason) + if kwargs.get('message') is None: + kwargs['message'] = message + if kwargs.get('traceback') is None: + kwargs['traceback'] = '' + if kwargs.get('version') is None: + kwargs['version'] = cherrypy.__version__ + + for k, v in kwargs.iteritems(): + if v is None: + kwargs[k] = "" + else: + kwargs[k] = _escape(kwargs[k]) + + # Use a custom template or callable for the error page? + pages = cherrypy.request.error_page + error_page = pages.get(code) or pages.get('default') + if error_page: + try: + if callable(error_page): + return error_page(**kwargs) + else: + return file(error_page, 'rb').read() % kwargs + except: + e = _format_exception(*_exc_info())[-1] + m = kwargs['message'] + if m: + m += "
" + m += "In addition, the custom error page failed:\n
%s" % e + kwargs['message'] = m + + return _HTTPErrorTemplate % kwargs + + +_ie_friendly_error_sizes = { + 400: 512, 403: 256, 404: 512, 405: 256, + 406: 512, 408: 512, 409: 512, 410: 256, + 500: 512, 501: 512, 505: 512, + } + + +def _be_ie_unfriendly(status): + import cherrypy + response = cherrypy.response + + # For some statuses, Internet Explorer 5+ shows "friendly error + # messages" instead of our response.body if the body is smaller + # than a given size. Fix this by returning a body over that size + # (by adding whitespace). + # See http://support.microsoft.com/kb/q218155/ + s = _ie_friendly_error_sizes.get(status, 0) + if s: + s += 1 + # Since we are issuing an HTTP error status, we assume that + # the entity is short, and we should just collapse it. + content = response.collapse_body() + l = len(content) + if l and l < s: + # IN ADDITION: the response must be written to IE + # in one chunk or it will still get replaced! Bah. + content = content + (" " * (s - l)) + response.body = content + response.headers['Content-Length'] = len(content) + + +def format_exc(exc=None): + """Return exc (or sys.exc_info if None), formatted.""" + if exc is None: + exc = _exc_info() + if exc == (None, None, None): + return "" + import traceback + return "".join(traceback.format_exception(*exc)) + +def bare_error(extrabody=None): + """Produce status, headers, body for a critical error. + + Returns a triple without calling any other questionable functions, + so it should be as error-free as possible. Call it from an HTTP server + if you get errors outside of the request. + + If extrabody is None, a friendly but rather unhelpful error message + is set in the body. If extrabody is a string, it will be appended + as-is to the body. + """ + + # The whole point of this function is to be a last line-of-defense + # in handling errors. That is, it must not raise any errors itself; + # it cannot be allowed to fail. Therefore, don't add to it! + # In particular, don't call any other CP functions. + + body = "Unrecoverable error in the server." + if extrabody is not None: + body += "\n" + extrabody + + return ("500 Internal Server Error", + [('Content-Type', 'text/plain'), + ('Content-Length', str(len(body)))], + [body]) + + diff --git a/src/cherrypy/_cplogging.py b/src/cherrypy/_cplogging.py new file mode 100644 index 0000000000..12c93d3cd8 --- /dev/null +++ b/src/cherrypy/_cplogging.py @@ -0,0 +1,248 @@ +"""CherryPy logging.""" + +import datetime +import logging +# Silence the no-handlers "warning" (stderr write!) in stdlib logging +logging.Logger.manager.emittedNoHandlerWarning = 1 +logfmt = logging.Formatter("%(message)s") +import os +import rfc822 +import sys + +import cherrypy +from cherrypy import _cperror + + +class LogManager(object): + + appid = None + error_log = None + access_log = None + access_log_format = \ + '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' + + def __init__(self, appid=None, logger_root="cherrypy"): + self.logger_root = logger_root + self.appid = appid + if appid is None: + self.error_log = logging.getLogger("%s.error" % logger_root) + self.access_log = logging.getLogger("%s.access" % logger_root) + else: + self.error_log = logging.getLogger("%s.error.%s" % (logger_root, appid)) + self.access_log = logging.getLogger("%s.access.%s" % (logger_root, appid)) + self.error_log.setLevel(logging.DEBUG) + self.access_log.setLevel(logging.INFO) + cherrypy.engine.subscribe('graceful', self.reopen_files) + + def reopen_files(self): + """Close and reopen all file handlers.""" + for log in (self.error_log, self.access_log): + for h in log.handlers: + if isinstance(h, logging.FileHandler): + h.acquire() + h.stream.close() + h.stream = open(h.baseFilename, h.mode) + h.release() + + def error(self, msg='', context='', severity=logging.INFO, traceback=False): + """Write to the error log. + + This is not just for errors! Applications may call this at any time + to log application-specific information. + """ + if traceback: + msg += _cperror.format_exc() + self.error_log.log(severity, ' '.join((self.time(), context, msg))) + + def __call__(self, *args, **kwargs): + """Write to the error log. + + This is not just for errors! Applications may call this at any time + to log application-specific information. + """ + return self.error(*args, **kwargs) + + def access(self): + """Write to the access log (in Apache/NCSA Combined Log format). + + Like Apache started doing in 2.0.46, non-printable and other special + characters in %r (and we expand that to all parts) are escaped using + \\xhh sequences, where hh stands for the hexadecimal representation + of the raw byte. Exceptions from this rule are " and \\, which are + escaped by prepending a backslash, and all whitespace characters, + which are written in their C-style notation (\\n, \\t, etc). + """ + request = cherrypy.request + remote = request.remote + response = cherrypy.response + outheaders = response.headers + inheaders = request.headers + + atoms = {'h': remote.name or remote.ip, + 'l': '-', + 'u': getattr(request, "login", None) or "-", + 't': self.time(), + 'r': request.request_line, + 's': response.status.split(" ", 1)[0], + 'b': outheaders.get('Content-Length', '') or "-", + 'f': inheaders.get('Referer', ''), + 'a': inheaders.get('User-Agent', ''), + } + for k, v in atoms.items(): + if isinstance(v, unicode): + v = v.encode('utf8') + elif not isinstance(v, str): + v = str(v) + # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc + # and backslash for us. All we have to do is strip the quotes. + v = repr(v)[1:-1] + # Escape double-quote. + atoms[k] = v.replace('"', '\\"') + + try: + self.access_log.log(logging.INFO, self.access_log_format % atoms) + except: + self(traceback=True) + + def time(self): + """Return now() in Apache Common Log Format (no timezone).""" + now = datetime.datetime.now() + month = rfc822._monthnames[now.month - 1].capitalize() + return ('[%02d/%s/%04d:%02d:%02d:%02d]' % + (now.day, month, now.year, now.hour, now.minute, now.second)) + + def _get_builtin_handler(self, log, key): + for h in log.handlers: + if getattr(h, "_cpbuiltin", None) == key: + return h + + + # ------------------------- Screen handlers ------------------------- # + + def _set_screen_handler(self, log, enable, stream=None): + h = self._get_builtin_handler(log, "screen") + if enable: + if not h: + if stream is None: + stream=sys.stderr + h = logging.StreamHandler(stream) + h.setLevel(logging.DEBUG) + h.setFormatter(logfmt) + h._cpbuiltin = "screen" + log.addHandler(h) + elif h: + log.handlers.remove(h) + + def _get_screen(self): + h = self._get_builtin_handler + has_h = h(self.error_log, "screen") or h(self.access_log, "screen") + return bool(has_h) + + def _set_screen(self, newvalue): + self._set_screen_handler(self.error_log, newvalue, stream=sys.stderr) + self._set_screen_handler(self.access_log, newvalue) + screen = property(_get_screen, _set_screen, + doc="If True, error and access will print to stderr.") + + + # -------------------------- File handlers -------------------------- # + + def _add_builtin_file_handler(self, log, fname): + h = logging.FileHandler(fname) + h.setLevel(logging.DEBUG) + h.setFormatter(logfmt) + h._cpbuiltin = "file" + log.addHandler(h) + + def _set_file_handler(self, log, filename): + h = self._get_builtin_handler(log, "file") + if filename: + if h: + if h.baseFilename != os.path.abspath(filename): + h.close() + log.handlers.remove(h) + self._add_builtin_file_handler(log, filename) + else: + self._add_builtin_file_handler(log, filename) + else: + if h: + h.close() + log.handlers.remove(h) + + def _get_error_file(self): + h = self._get_builtin_handler(self.error_log, "file") + if h: + return h.baseFilename + return '' + def _set_error_file(self, newvalue): + self._set_file_handler(self.error_log, newvalue) + error_file = property(_get_error_file, _set_error_file, + doc="The filename for self.error_log.") + + def _get_access_file(self): + h = self._get_builtin_handler(self.access_log, "file") + if h: + return h.baseFilename + return '' + def _set_access_file(self, newvalue): + self._set_file_handler(self.access_log, newvalue) + access_file = property(_get_access_file, _set_access_file, + doc="The filename for self.access_log.") + + + # ------------------------- WSGI handlers ------------------------- # + + def _set_wsgi_handler(self, log, enable): + h = self._get_builtin_handler(log, "wsgi") + if enable: + if not h: + h = WSGIErrorHandler() + h.setLevel(logging.DEBUG) + h.setFormatter(logfmt) + h._cpbuiltin = "wsgi" + log.addHandler(h) + elif h: + log.handlers.remove(h) + + def _get_wsgi(self): + return bool(self._get_builtin_handler(self.error_log, "wsgi")) + + def _set_wsgi(self, newvalue): + self._set_wsgi_handler(self.error_log, newvalue) + wsgi = property(_get_wsgi, _set_wsgi, + doc="If True, error messages will be sent to wsgi.errors.") + + +class WSGIErrorHandler(logging.Handler): + "A handler class which writes logging records to environ['wsgi.errors']." + + def flush(self): + """Flushes the stream.""" + try: + stream = cherrypy.request.wsgi_environ.get('wsgi.errors') + except (AttributeError, KeyError): + pass + else: + stream.flush() + + def emit(self, record): + """Emit a record.""" + try: + stream = cherrypy.request.wsgi_environ.get('wsgi.errors') + except (AttributeError, KeyError): + pass + else: + try: + msg = self.format(record) + fs = "%s\n" + import types + if not hasattr(types, "UnicodeType"): #if no unicode support... + stream.write(fs % msg) + else: + try: + stream.write(fs % msg) + except UnicodeError: + stream.write(fs % msg.encode("UTF-8")) + self.flush() + except: + self.handleError(record) diff --git a/src/cherrypy/_cpmodpy.py b/src/cherrypy/_cpmodpy.py new file mode 100644 index 0000000000..991362a16e --- /dev/null +++ b/src/cherrypy/_cpmodpy.py @@ -0,0 +1,330 @@ +"""Native adapter for serving CherryPy via mod_python + +Basic usage: + +########################################## +# Application in a module called myapp.py +########################################## + +import cherrypy + +class Root: + @cherrypy.expose + def index(self): + return 'Hi there, Ho there, Hey there' + + +# We will use this method from the mod_python configuration +# as the entry point to our application +def setup_server(): + cherrypy.tree.mount(Root()) + cherrypy.config.update({'environment': 'production', + 'log.screen': False, + 'show_tracebacks': False}) + +########################################## +# mod_python settings for apache2 +# This should reside in your httpd.conf +# or a file that will be loaded at +# apache startup +########################################## + +# Start +DocumentRoot "/" +Listen 8080 +LoadModule python_module /usr/lib/apache2/modules/mod_python.so + + + PythonPath "sys.path+['/path/to/my/application']" + SetHandler python-program + PythonHandler cherrypy._cpmodpy::handler + PythonOption cherrypy.setup myapp::setup_server + PythonDebug On + +# End + +The actual path to your mod_python.so is dependent on your +environment. In this case we suppose a global mod_python +installation on a Linux distribution such as Ubuntu. + +We do set the PythonPath configuration setting so that +your application can be found by from the user running +the apache2 instance. Of course if your application +resides in the global site-package this won't be needed. + +Then restart apache2 and access http://127.0.0.1:8080 +""" + +import logging +import StringIO + +import cherrypy +from cherrypy._cperror import format_exc, bare_error +from cherrypy.lib import http + + + +# ------------------------------ Request-handling + + + +def setup(req): + from mod_python import apache + + # Run any setup function defined by a "PythonOption cherrypy.setup" directive. + options = req.get_options() + if 'cherrypy.setup' in options: + atoms = options['cherrypy.setup'].split('::', 1) + if len(atoms) == 1: + mod = __import__(atoms[0], globals(), locals()) + else: + modname, fname = atoms + mod = __import__(modname, globals(), locals(), [fname]) + func = getattr(mod, fname) + func() + + cherrypy.config.update({'log.screen': False, + "tools.ignore_headers.on": True, + "tools.ignore_headers.headers": ['Range'], + }) + + engine = cherrypy.engine + if hasattr(engine, "signal_handler"): + engine.signal_handler.unsubscribe() + if hasattr(engine, "console_control_handler"): + engine.console_control_handler.unsubscribe() + engine.autoreload.unsubscribe() + cherrypy.server.unsubscribe() + + def _log(msg, level): + newlevel = apache.APLOG_ERR + if logging.DEBUG >= level: + newlevel = apache.APLOG_DEBUG + elif logging.INFO >= level: + newlevel = apache.APLOG_INFO + elif logging.WARNING >= level: + newlevel = apache.APLOG_WARNING + # On Windows, req.server is required or the msg will vanish. See + # http://www.modpython.org/pipermail/mod_python/2003-October/014291.html. + # Also, "When server is not specified...LogLevel does not apply..." + apache.log_error(msg, newlevel, req.server) + engine.subscribe('log', _log) + + engine.start() + + def cherrypy_cleanup(data): + engine.exit() + try: + # apache.register_cleanup wasn't available until 3.1.4. + apache.register_cleanup(cherrypy_cleanup) + except AttributeError: + req.server.register_cleanup(req, cherrypy_cleanup) + + +class _ReadOnlyRequest: + expose = ('read', 'readline', 'readlines') + def __init__(self, req): + for method in self.expose: + self.__dict__[method] = getattr(req, method) + + +recursive = False + +_isSetUp = False +def handler(req): + from mod_python import apache + try: + global _isSetUp + if not _isSetUp: + setup(req) + _isSetUp = True + + # Obtain a Request object from CherryPy + local = req.connection.local_addr + local = http.Host(local[0], local[1], req.connection.local_host or "") + remote = req.connection.remote_addr + remote = http.Host(remote[0], remote[1], req.connection.remote_host or "") + + scheme = req.parsed_uri[0] or 'http' + req.get_basic_auth_pw() + + try: + # apache.mpm_query only became available in mod_python 3.1 + q = apache.mpm_query + threaded = q(apache.AP_MPMQ_IS_THREADED) + forked = q(apache.AP_MPMQ_IS_FORKED) + except AttributeError: + bad_value = ("You must provide a PythonOption '%s', " + "either 'on' or 'off', when running a version " + "of mod_python < 3.1") + + threaded = options.get('multithread', '').lower() + if threaded == 'on': + threaded = True + elif threaded == 'off': + threaded = False + else: + raise ValueError(bad_value % "multithread") + + forked = options.get('multiprocess', '').lower() + if forked == 'on': + forked = True + elif forked == 'off': + forked = False + else: + raise ValueError(bad_value % "multiprocess") + + sn = cherrypy.tree.script_name(req.uri or "/") + if sn is None: + send_response(req, '404 Not Found', [], '') + else: + app = cherrypy.tree.apps[sn] + method = req.method + path = req.uri + qs = req.args or "" + reqproto = req.protocol + headers = req.headers_in.items() + rfile = _ReadOnlyRequest(req) + prev = None + + try: + redirections = [] + while True: + request, response = app.get_serving(local, remote, scheme, + "HTTP/1.1") + request.login = req.user + request.multithread = bool(threaded) + request.multiprocess = bool(forked) + request.app = app + request.prev = prev + + # Run the CherryPy Request object and obtain the response + try: + request.run(method, path, qs, reqproto, headers, rfile) + break + except cherrypy.InternalRedirect, ir: + app.release_serving() + prev = request + + if not recursive: + if ir.path in redirections: + raise RuntimeError("InternalRedirector visited the " + "same URL twice: %r" % ir.path) + else: + # Add the *previous* path_info + qs to redirections. + if qs: + qs = "?" + qs + redirections.append(sn + path + qs) + + # Munge environment and try again. + method = "GET" + path = ir.path + qs = ir.query_string + rfile = StringIO.StringIO() + + send_response(req, response.status, response.header_list, + response.body, response.stream) + finally: + app.release_serving() + except: + tb = format_exc() + cherrypy.log(tb, 'MOD_PYTHON', severity=logging.ERROR) + s, h, b = bare_error() + send_response(req, s, h, b) + return apache.OK + + +def send_response(req, status, headers, body, stream=False): + # Set response status + req.status = int(status[:3]) + + # Set response headers + req.content_type = "text/plain" + for header, value in headers: + if header.lower() == 'content-type': + req.content_type = value + continue + req.headers_out.add(header, value) + + if stream: + # Flush now so the status and headers are sent immediately. + req.flush() + + # Set response body + if isinstance(body, basestring): + req.write(body) + else: + for seg in body: + req.write(seg) + + + +# --------------- Startup tools for CherryPy + mod_python --------------- # + + +import os +import re + + +def read_process(cmd, args=""): + pipein, pipeout = os.popen4("%s %s" % (cmd, args)) + try: + firstline = pipeout.readline() + if (re.search(r"(not recognized|No such file|not found)", firstline, + re.IGNORECASE)): + raise IOError('%s must be on your system path.' % cmd) + output = firstline + pipeout.read() + finally: + pipeout.close() + return output + + +class ModPythonServer(object): + + template = """ +# Apache2 server configuration file for running CherryPy with mod_python. + +DocumentRoot "/" +Listen %(port)s +LoadModule python_module modules/mod_python.so + + + SetHandler python-program + PythonHandler %(handler)s + PythonDebug On +%(opts)s + +""" + + def __init__(self, loc="/", port=80, opts=None, apache_path="apache", + handler="cherrypy._cpmodpy::handler"): + self.loc = loc + self.port = port + self.opts = opts + self.apache_path = apache_path + self.handler = handler + + def start(self): + opts = "".join([" PythonOption %s %s\n" % (k, v) + for k, v in self.opts]) + conf_data = self.template % {"port": self.port, + "loc": self.loc, + "opts": opts, + "handler": self.handler, + } + + mpconf = os.path.join(os.path.dirname(__file__), "cpmodpy.conf") + f = open(mpconf, 'wb') + try: + f.write(conf_data) + finally: + f.close() + + response = read_process(self.apache_path, "-k start -f %s" % mpconf) + self.ready = True + return response + + def stop(self): + os.popen("apache -k stop") + self.ready = False + diff --git a/src/cherrypy/_cprequest.py b/src/cherrypy/_cprequest.py new file mode 100644 index 0000000000..9ec310c972 --- /dev/null +++ b/src/cherrypy/_cprequest.py @@ -0,0 +1,906 @@ + +import Cookie +import os +import sys +import time +import types + +import cherrypy +from cherrypy import _cpcgifs, _cpconfig +from cherrypy._cperror import format_exc, bare_error +from cherrypy.lib import http + + +class Hook(object): + """A callback and its metadata: failsafe, priority, and kwargs.""" + + __metaclass__ = cherrypy._AttributeDocstrings + + callback = None + callback__doc = """ + The bare callable that this Hook object is wrapping, which will + be called when the Hook is called.""" + + failsafe = False + failsafe__doc = """ + If True, the callback is guaranteed to run even if other callbacks + from the same call point raise exceptions.""" + + priority = 50 + priority__doc = """ + Defines the order of execution for a list of Hooks. Priority numbers + should be limited to the closed interval [0, 100], but values outside + this range are acceptable, as are fractional values.""" + + kwargs = {} + kwargs__doc = """ + A set of keyword arguments that will be passed to the + callable on each call.""" + + def __init__(self, callback, failsafe=None, priority=None, **kwargs): + self.callback = callback + + if failsafe is None: + failsafe = getattr(callback, "failsafe", False) + self.failsafe = failsafe + + if priority is None: + priority = getattr(callback, "priority", 50) + self.priority = priority + + self.kwargs = kwargs + + def __cmp__(self, other): + return cmp(self.priority, other.priority) + + def __call__(self): + """Run self.callback(**self.kwargs).""" + return self.callback(**self.kwargs) + + def __repr__(self): + cls = self.__class__ + return ("%s.%s(callback=%r, failsafe=%r, priority=%r, %s)" + % (cls.__module__, cls.__name__, self.callback, + self.failsafe, self.priority, + ", ".join(['%s=%r' % (k, v) + for k, v in self.kwargs.iteritems()]))) + + +class HookMap(dict): + """A map of call points to lists of callbacks (Hook objects).""" + + def __new__(cls, points=None): + d = dict.__new__(cls) + for p in points or []: + d[p] = [] + return d + + def __init__(self, *a, **kw): + pass + + def attach(self, point, callback, failsafe=None, priority=None, **kwargs): + """Append a new Hook made from the supplied arguments.""" + self[point].append(Hook(callback, failsafe, priority, **kwargs)) + + def run(self, point): + """Execute all registered Hooks (callbacks) for the given point.""" + exc = None + hooks = self[point] + hooks.sort() + for hook in hooks: + # Some hooks are guaranteed to run even if others at + # the same hookpoint fail. We will still log the failure, + # but proceed on to the next hook. The only way + # to stop all processing from one of these hooks is + # to raise SystemExit and stop the whole server. + if exc is None or hook.failsafe: + try: + hook() + except (KeyboardInterrupt, SystemExit): + raise + except (cherrypy.HTTPError, cherrypy.HTTPRedirect, + cherrypy.InternalRedirect): + exc = sys.exc_info()[1] + except: + exc = sys.exc_info()[1] + cherrypy.log(traceback=True, severity=40) + if exc: + raise + + def __copy__(self): + newmap = self.__class__() + # We can't just use 'update' because we want copies of the + # mutable values (each is a list) as well. + for k, v in self.iteritems(): + newmap[k] = v[:] + return newmap + copy = __copy__ + + def __repr__(self): + cls = self.__class__ + return "%s.%s(points=%r)" % (cls.__module__, cls.__name__, self.keys()) + + +# Config namespace handlers + +def hooks_namespace(k, v): + """Attach bare hooks declared in config.""" + # Use split again to allow multiple hooks for a single + # hookpoint per path (e.g. "hooks.before_handler.1"). + # Little-known fact you only get from reading source ;) + hookpoint = k.split(".", 1)[0] + if isinstance(v, basestring): + v = cherrypy.lib.attributes(v) + if not isinstance(v, Hook): + v = Hook(v) + cherrypy.request.hooks[hookpoint].append(v) + +def request_namespace(k, v): + """Attach request attributes declared in config.""" + setattr(cherrypy.request, k, v) + +def response_namespace(k, v): + """Attach response attributes declared in config.""" + setattr(cherrypy.response, k, v) + +def error_page_namespace(k, v): + """Attach error pages declared in config.""" + if k != 'default': + k = int(k) + cherrypy.request.error_page[k] = v + + +hookpoints = ['on_start_resource', 'before_request_body', + 'before_handler', 'before_finalize', + 'on_end_resource', 'on_end_request', + 'before_error_response', 'after_error_response'] + + +class Request(object): + """An HTTP request. + + This object represents the metadata of an HTTP request message; + that is, it contains attributes which describe the environment + in which the request URL, headers, and body were sent (if you + want tools to interpret the headers and body, those are elsewhere, + mostly in Tools). This 'metadata' consists of socket data, + transport characteristics, and the Request-Line. This object + also contains data regarding the configuration in effect for + the given URL, and the execution plan for generating a response. + """ + + __metaclass__ = cherrypy._AttributeDocstrings + + prev = None + prev__doc = """ + The previous Request object (if any). This should be None + unless we are processing an InternalRedirect.""" + + # Conversation/connection attributes + local = http.Host("127.0.0.1", 80) + local__doc = \ + "An http.Host(ip, port, hostname) object for the server socket." + + remote = http.Host("127.0.0.1", 1111) + remote__doc = \ + "An http.Host(ip, port, hostname) object for the client socket." + + scheme = "http" + scheme__doc = """ + The protocol used between client and server. In most cases, + this will be either 'http' or 'https'.""" + + server_protocol = "HTTP/1.1" + server_protocol__doc = """ + The HTTP version for which the HTTP server is at least + conditionally compliant.""" + + base = "" + base__doc = """The (scheme://host) portion of the requested URL.""" + + # Request-Line attributes + request_line = "" + request_line__doc = """ + The complete Request-Line received from the client. This is a + single string consisting of the request method, URI, and protocol + version (joined by spaces). Any final CRLF is removed.""" + + method = "GET" + method__doc = """ + Indicates the HTTP method to be performed on the resource identified + by the Request-URI. Common methods include GET, HEAD, POST, PUT, and + DELETE. CherryPy allows any extension method; however, various HTTP + servers and gateways may restrict the set of allowable methods. + CherryPy applications SHOULD restrict the set (on a per-URI basis).""" + + query_string = "" + query_string__doc = """ + The query component of the Request-URI, a string of information to be + interpreted by the resource. The query portion of a URI follows the + path component, and is separated by a '?'. For example, the URI + 'http://www.cherrypy.org/wiki?a=3&b=4' has the query component, + 'a=3&b=4'.""" + + protocol = (1, 1) + protocol__doc = """The HTTP protocol version corresponding to the set + of features which should be allowed in the response. If BOTH + the client's request message AND the server's level of HTTP + compliance is HTTP/1.1, this attribute will be the tuple (1, 1). + If either is 1.0, this attribute will be the tuple (1, 0). + Lower HTTP protocol versions are not explicitly supported.""" + + params = {} + params__doc = """ + A dict which combines query string (GET) and request entity (POST) + variables. This is populated in two stages: GET params are added + before the 'on_start_resource' hook, and POST params are added + between the 'before_request_body' and 'before_handler' hooks.""" + + # Message attributes + header_list = [] + header_list__doc = """ + A list of the HTTP request headers as (name, value) tuples. + In general, you should use request.headers (a dict) instead.""" + + headers = http.HeaderMap() + headers__doc = """ + A dict-like object containing the request headers. Keys are header + names (in Title-Case format); however, you may get and set them in + a case-insensitive manner. That is, headers['Content-Type'] and + headers['content-type'] refer to the same value. Values are header + values (decoded according to RFC 2047 if necessary). See also: + http.HeaderMap, http.HeaderElement.""" + + cookie = Cookie.SimpleCookie() + cookie__doc = """See help(Cookie).""" + + rfile = None + rfile__doc = """ + If the request included an entity (body), it will be available + as a stream in this attribute. However, the rfile will normally + be read for you between the 'before_request_body' hook and the + 'before_handler' hook, and the resulting string is placed into + either request.params or the request.body attribute. + + You may disable the automatic consumption of the rfile by setting + request.process_request_body to False, either in config for the desired + path, or in an 'on_start_resource' or 'before_request_body' hook. + + WARNING: In almost every case, you should not attempt to read from the + rfile stream after CherryPy's automatic mechanism has read it. If you + turn off the automatic parsing of rfile, you should read exactly the + number of bytes specified in request.headers['Content-Length']. + Ignoring either of these warnings may result in a hung request thread + or in corruption of the next (pipelined) request. + """ + + process_request_body = True + process_request_body__doc = """ + If True, the rfile (if any) is automatically read and parsed, + and the result placed into request.params or request.body.""" + + methods_with_bodies = ("POST", "PUT") + methods_with_bodies__doc = """ + A sequence of HTTP methods for which CherryPy will automatically + attempt to read a body from the rfile.""" + + body = None + body__doc = """ + If the request Content-Type is 'application/x-www-form-urlencoded' + or multipart, this will be None. Otherwise, this will contain the + request entity body as a string; this value is set between the + 'before_request_body' and 'before_handler' hooks (assuming that + process_request_body is True).""" + + body_params = None + body_params__doc = """ + If the request Content-Type is 'application/x-www-form-urlencoded' or + multipart, this will be a dict of the params pulled from the entity + body; that is, it will be the portion of request.params that come + from the message body (sometimes called "POST params", although they + can be sent with various HTTP method verbs). This value is set between + the 'before_request_body' and 'before_handler' hooks (assuming that + process_request_body is True).""" + + # Dispatch attributes + dispatch = cherrypy.dispatch.Dispatcher() + dispatch__doc = """ + The object which looks up the 'page handler' callable and collects + config for the current request based on the path_info, other + request attributes, and the application architecture. The core + calls the dispatcher as early as possible, passing it a 'path_info' + argument. + + The default dispatcher discovers the page handler by matching path_info + to a hierarchical arrangement of objects, starting at request.app.root. + See help(cherrypy.dispatch) for more information.""" + + script_name = "" + script_name__doc = """ + The 'mount point' of the application which is handling this request. + + This attribute MUST NOT end in a slash. If the script_name refers to + the root of the URI, it MUST be an empty string (not "/"). + """ + + path_info = "/" + path_info__doc = """ + The 'relative path' portion of the Request-URI. This is relative + to the script_name ('mount point') of the application which is + handling this request.""" + + login = None + login__doc = """ + When authentication is used during the request processing this is + set to 'False' if it failed and to the 'username' value if it succeeded. + The default 'None' implies that no authentication happened.""" + + # Note that cherrypy.url uses "if request.app:" to determine whether + # the call is during a real HTTP request or not. So leave this None. + app = None + app__doc = \ + """The cherrypy.Application object which is handling this request.""" + + handler = None + handler__doc = """ + The function, method, or other callable which CherryPy will call to + produce the response. The discovery of the handler and the arguments + it will receive are determined by the request.dispatch object. + By default, the handler is discovered by walking a tree of objects + starting at request.app.root, and is then passed all HTTP params + (from the query string and POST body) as keyword arguments.""" + + toolmaps = {} + toolmaps__doc = """ + A nested dict of all Toolboxes and Tools in effect for this request, + of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" + + config = None + config__doc = """ + A flat dict of all configuration entries which apply to the + current request. These entries are collected from global config, + application config (based on request.path_info), and from handler + config (exactly how is governed by the request.dispatch object in + effect for this request; by default, handler config can be attached + anywhere in the tree between request.app.root and the final handler, + and inherits downward).""" + + is_index = None + is_index__doc = """ + This will be True if the current request is mapped to an 'index' + resource handler (also, a 'default' handler if path_info ends with + a slash). The value may be used to automatically redirect the + user-agent to a 'more canonical' URL which either adds or removes + the trailing slash. See cherrypy.tools.trailing_slash.""" + + hooks = HookMap(hookpoints) + hooks__doc = """ + A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. + Each key is a str naming the hook point, and each value is a list + of hooks which will be called at that hook point during this request. + The list of hooks is generally populated as early as possible (mostly + from Tools specified in config), but may be extended at any time. + See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" + + error_response = cherrypy.HTTPError(500).set_response + error_response__doc = """ + The no-arg callable which will handle unexpected, untrapped errors + during request processing. This is not used for expected exceptions + (like NotFound, HTTPError, or HTTPRedirect) which are raised in + response to expected conditions (those should be customized either + via request.error_page or by overriding HTTPError.set_response). + By default, error_response uses HTTPError(500) to return a generic + error response to the user-agent.""" + + error_page = {} + error_page__doc = """ + A dict of {error code: response filename or callable} pairs. + + The error code must be an int representing a given HTTP error code, + or the string 'default', which will be used if no matching entry + is found for a given numeric code. + + If a filename is provided, the file should contain a Python string- + formatting template, and can expect by default to receive format + values with the mapping keys %(status)s, %(message)s, %(traceback)s, + and %(version)s. The set of format mappings can be extended by + overriding HTTPError.set_response. + + If a callable is provided, it will be called by default with keyword + arguments 'status', 'message', 'traceback', and 'version', as for a + string-formatting template. The callable must return a string which + will be set to response.body. It may also override headers or perform + any other processing. + + If no entry is given for an error code, and no 'default' entry exists, + a default template will be used. + """ + + show_tracebacks = True + show_tracebacks__doc = """ + If True, unexpected errors encountered during request processing will + include a traceback in the response body.""" + + throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect) + throws__doc = \ + """The sequence of exceptions which Request.run does not trap.""" + + throw_errors = False + throw_errors__doc = """ + If True, Request.run will not trap any errors (except HTTPRedirect and + HTTPError, which are more properly called 'exceptions', not errors).""" + + closed = False + closed__doc = """ + True once the close method has been called, False otherwise.""" + + stage = None + stage__doc = """ + A string containing the stage reached in the request-handling process. + This is useful when debugging a live server with hung requests.""" + + namespaces = _cpconfig.NamespaceSet( + **{"hooks": hooks_namespace, + "request": request_namespace, + "response": response_namespace, + "error_page": error_page_namespace, + "tools": cherrypy.tools, + }) + + def __init__(self, local_host, remote_host, scheme="http", + server_protocol="HTTP/1.1"): + """Populate a new Request object. + + local_host should be an http.Host object with the server info. + remote_host should be an http.Host object with the client info. + scheme should be a string, either "http" or "https". + """ + self.local = local_host + self.remote = remote_host + self.scheme = scheme + self.server_protocol = server_protocol + + self.closed = False + + # Put a *copy* of the class error_page into self. + self.error_page = self.error_page.copy() + + # Put a *copy* of the class namespaces into self. + self.namespaces = self.namespaces.copy() + + self.stage = None + + def close(self): + """Run cleanup code. (Core)""" + if not self.closed: + self.closed = True + self.stage = 'on_end_request' + self.hooks.run('on_end_request') + self.stage = 'close' + + def run(self, method, path, query_string, req_protocol, headers, rfile): + """Process the Request. (Core) + + method, path, query_string, and req_protocol should be pulled directly + from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). + path should be %XX-unquoted, but query_string should not be. + headers should be a list of (name, value) tuples. + rfile should be a file-like object containing the HTTP request entity. + + When run() is done, the returned object should have 3 attributes: + status, e.g. "200 OK" + header_list, a list of (name, value) tuples + body, an iterable yielding strings + + Consumer code (HTTP servers) should then access these response + attributes to build the outbound stream. + + """ + self.stage = 'run' + try: + self.error_response = cherrypy.HTTPError(500).set_response + + self.method = method + path = path or "/" + self.query_string = query_string or '' + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + rp = int(req_protocol[5]), int(req_protocol[7]) + sp = int(self.server_protocol[5]), int(self.server_protocol[7]) + self.protocol = min(rp, sp) + + # Rebuild first line of the request (e.g. "GET /path HTTP/1.0"). + url = path + if query_string: + url += '?' + query_string + self.request_line = '%s %s %s' % (method, url, req_protocol) + + self.header_list = list(headers) + self.rfile = rfile + self.headers = http.HeaderMap() + self.cookie = Cookie.SimpleCookie() + self.handler = None + + # path_info should be the path from the + # app root (script_name) to the handler. + self.script_name = self.app.script_name + self.path_info = pi = path[len(self.script_name):] + + self.stage = 'respond' + self.respond(pi) + + except self.throws: + raise + except: + if self.throw_errors: + raise + else: + # Failure in setup, error handler or finalize. Bypass them. + # Can't use handle_error because we may not have hooks yet. + cherrypy.log(traceback=True, severity=40) + if self.show_tracebacks: + body = format_exc() + else: + body = "" + r = bare_error(body) + response = cherrypy.response + response.status, response.header_list, response.body = r + + if self.method == "HEAD": + # HEAD requests MUST NOT return a message-body in the response. + cherrypy.response.body = [] + + cherrypy.log.access() + + if cherrypy.response.timed_out: + raise cherrypy.TimeoutError() + + return cherrypy.response + + def respond(self, path_info): + """Generate a response for the resource at self.path_info. (Core)""" + try: + try: + try: + if self.app is None: + raise cherrypy.NotFound() + + # Get the 'Host' header, so we can HTTPRedirect properly. + self.stage = 'process_headers' + self.process_headers() + + # Make a copy of the class hooks + self.hooks = self.__class__.hooks.copy() + self.toolmaps = {} + self.stage = 'get_resource' + self.get_resource(path_info) + self.namespaces(self.config) + + self.stage = 'on_start_resource' + self.hooks.run('on_start_resource') + + if self.process_request_body: + if self.method not in self.methods_with_bodies: + self.process_request_body = False + + self.stage = 'before_request_body' + self.hooks.run('before_request_body') + if self.process_request_body: + self.process_body() + + self.stage = 'before_handler' + self.hooks.run('before_handler') + if self.handler: + self.stage = 'handler' + cherrypy.response.body = self.handler() + + self.stage = 'before_finalize' + self.hooks.run('before_finalize') + cherrypy.response.finalize() + except (cherrypy.HTTPRedirect, cherrypy.HTTPError), inst: + inst.set_response() + self.stage = 'before_finalize (HTTPError)' + self.hooks.run('before_finalize') + cherrypy.response.finalize() + finally: + self.stage = 'on_end_resource' + self.hooks.run('on_end_resource') + except self.throws: + raise + except: + if self.throw_errors: + raise + self.handle_error() + + def process_headers(self): + """Parse HTTP header data into Python structures. (Core)""" + self.params = http.parse_query_string(self.query_string) + + # Process the headers into self.headers + headers = self.headers + for name, value in self.header_list: + # Call title() now (and use dict.__method__(headers)) + # so title doesn't have to be called twice. + name = name.title() + value = value.strip() + + # Warning: if there is more than one header entry for cookies (AFAIK, + # only Konqueror does that), only the last one will remain in headers + # (but they will be correctly stored in request.cookie). + if "=?" in value: + dict.__setitem__(headers, name, http.decode_TEXT(value)) + else: + dict.__setitem__(headers, name, value) + + # Handle cookies differently because on Konqueror, multiple + # cookies come on different lines with the same key + if name == 'Cookie': + self.cookie.load(value) + + if not dict.__contains__(headers, 'Host'): + # All Internet-based HTTP/1.1 servers MUST respond with a 400 + # (Bad Request) status code to any HTTP/1.1 request message + # which lacks a Host header field. + if self.protocol >= (1, 1): + msg = "HTTP/1.1 requires a 'Host' request header." + raise cherrypy.HTTPError(400, msg) + host = dict.get(headers, 'Host') + if not host: + host = self.local.name or self.local.ip + self.base = "%s://%s" % (self.scheme, host) + + def get_resource(self, path): + """Call a dispatcher (which sets self.handler and .config). (Core)""" + dispatch = self.dispatch + # First, see if there is a custom dispatch at this URI. Custom + # dispatchers can only be specified in app.config, not in _cp_config + # (since custom dispatchers may not even have an app.root). + trail = path or "/" + while trail: + nodeconf = self.app.config.get(trail, {}) + + d = nodeconf.get("request.dispatch") + if d: + dispatch = d + break + + lastslash = trail.rfind("/") + if lastslash == -1: + break + elif lastslash == 0 and trail != "/": + trail = "/" + else: + trail = trail[:lastslash] + + # dispatch() should set self.handler and self.config + dispatch(path) + + def process_body(self): + """Convert request.rfile into request.params (or request.body). (Core)""" + if not self.headers.get("Content-Length", ""): + # No Content-Length header supplied (or it's 0). + # If we went ahead and called cgi.FieldStorage, it would hang, + # since it cannot determine when to stop reading from the socket. + # See http://www.cherrypy.org/ticket/493. + # See also http://www.cherrypy.org/ticket/650. + # Note also that we expect any HTTP server to have decoded + # any message-body that had a transfer-coding, and we expect + # the HTTP server to have supplied a Content-Length header + # which is valid for the decoded entity-body. + raise cherrypy.HTTPError(411) + + # If the headers are missing "Content-Type" then add one + # with an empty value. This ensures that FieldStorage + # won't parse the request body for params if the client + # didn't provide a "Content-Type" header. + if 'Content-Type' not in self.headers: + h = http.HeaderMap(self.headers.items()) + h['Content-Type'] = '' + else: + h = self.headers + + try: + forms = _cpcgifs.FieldStorage(fp=self.rfile, + headers=h, + # FieldStorage only recognizes POST. + environ={'REQUEST_METHOD': "POST"}, + keep_blank_values=1) + except Exception, e: + if e.__class__.__name__ == 'MaxSizeExceeded': + # Post data is too big + raise cherrypy.HTTPError(413) + else: + raise + + # Note that, if headers['Content-Type'] is multipart/*, + # then forms.file will not exist; instead, each form[key] + # item will be its own file object, and will be handled + # by params_from_CGI_form. + if forms.file: + # request body was a content-type other than form params. + self.body = forms.file + else: + self.body_params = p = http.params_from_CGI_form(forms) + self.params.update(p) + + def handle_error(self): + """Handle the last unanticipated exception. (Core)""" + try: + self.hooks.run("before_error_response") + if self.error_response: + self.error_response() + self.hooks.run("after_error_response") + cherrypy.response.finalize() + except cherrypy.HTTPRedirect, inst: + inst.set_response() + cherrypy.response.finalize() + + +def file_generator(input, chunkSize=65536): + """Yield the given input (a file object) in chunks (default 64k). (Core)""" + chunk = input.read(chunkSize) + while chunk: + yield chunk + chunk = input.read(chunkSize) + input.close() + + +class Body(object): + """The body of the HTTP response (the response entity).""" + + def __get__(self, obj, objclass=None): + if obj is None: + # When calling on the class instead of an instance... + return self + else: + return obj._body + + def __set__(self, obj, value): + # Convert the given value to an iterable object. + if isinstance(value, basestring): + # strings get wrapped in a list because iterating over a single + # item list is much faster than iterating over every character + # in a long string. + if value: + value = [value] + else: + # [''] doesn't evaluate to False, so replace it with []. + value = [] + elif isinstance(value, types.FileType): + value = file_generator(value) + elif value is None: + value = [] + obj._body = value + + +class Response(object): + """An HTTP Response, including status, headers, and body. + + Application developers should use Response.headers (a dict) to + set or modify HTTP response headers. When the response is finalized, + Response.headers is transformed into Response.header_list as + (key, value) tuples. + """ + + __metaclass__ = cherrypy._AttributeDocstrings + + # Class attributes for dev-time introspection. + status = "" + status__doc = """The HTTP Status-Code and Reason-Phrase.""" + + header_list = [] + header_list__doc = """ + A list of the HTTP response headers as (name, value) tuples. + In general, you should use response.headers (a dict) instead.""" + + headers = http.HeaderMap() + headers__doc = """ + A dict-like object containing the response headers. Keys are header + names (in Title-Case format); however, you may get and set them in + a case-insensitive manner. That is, headers['Content-Type'] and + headers['content-type'] refer to the same value. Values are header + values (decoded according to RFC 2047 if necessary). See also: + http.HeaderMap, http.HeaderElement.""" + + cookie = Cookie.SimpleCookie() + cookie__doc = """See help(Cookie).""" + + body = Body() + body__doc = """The body (entity) of the HTTP response.""" + + time = None + time__doc = """The value of time.time() when created. Use in HTTP dates.""" + + timeout = 300 + timeout__doc = """Seconds after which the response will be aborted.""" + + timed_out = False + timed_out__doc = """ + Flag to indicate the response should be aborted, because it has + exceeded its timeout.""" + + stream = False + stream__doc = """If False, buffer the response body.""" + + def __init__(self): + self.status = None + self.header_list = None + self._body = [] + self.time = time.time() + + self.headers = http.HeaderMap() + # Since we know all our keys are titled strings, we can + # bypass HeaderMap.update and get a big speed boost. + dict.update(self.headers, { + "Content-Type": 'text/html', + "Server": "CherryPy/" + cherrypy.__version__, + "Date": http.HTTPDate(self.time), + }) + self.cookie = Cookie.SimpleCookie() + + def collapse_body(self): + """Collapse self.body to a single string; replace it and return it.""" + newbody = ''.join([chunk for chunk in self.body]) + self.body = newbody + return newbody + + def finalize(self): + """Transform headers (and cookies) into self.header_list. (Core)""" + try: + code, reason, _ = http.valid_status(self.status) + except ValueError, x: + raise cherrypy.HTTPError(500, x.args[0]) + + self.status = "%s %s" % (code, reason) + + headers = self.headers + if self.stream: + if dict.get(headers, 'Content-Length') is None: + dict.pop(headers, 'Content-Length', None) + elif code < 200 or code in (204, 205, 304): + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." + dict.pop(headers, 'Content-Length', None) + self.body = "" + else: + # Responses which are not streamed should have a Content-Length, + # but allow user code to set Content-Length if desired. + if dict.get(headers, 'Content-Length') is None: + content = self.collapse_body() + dict.__setitem__(headers, 'Content-Length', len(content)) + + # Transform our header dict into a list of tuples. + self.header_list = h = headers.output(cherrypy.request.protocol) + + cookie = self.cookie.output() + if cookie: + for line in cookie.split("\n"): + if line.endswith("\r"): + # Python 2.4 emits cookies joined by LF but 2.5+ by CRLF. + line = line[:-1] + name, value = line.split(": ", 1) + h.append((name, value)) + + def check_timeout(self): + """If now > self.time + self.timeout, set self.timed_out. + + This purposefully sets a flag, rather than raising an error, + so that a monitor thread can interrupt the Response thread. + """ + if time.time() > self.time + self.timeout: + self.timed_out = True + + + diff --git a/src/cherrypy/_cpserver.py b/src/cherrypy/_cpserver.py new file mode 100644 index 0000000000..53259cb086 --- /dev/null +++ b/src/cherrypy/_cpserver.py @@ -0,0 +1,117 @@ +"""Manage HTTP servers with CherryPy.""" + +import warnings + +import cherrypy +from cherrypy.lib import attributes + +# We import * because we want to export check_port +# et al as attributes of this module. +from cherrypy.process.servers import * + + +class Server(ServerAdapter): + """An adapter for an HTTP server. + + You can set attributes (like socket_host and socket_port) + on *this* object (which is probably cherrypy.server), and call + quickstart. For example: + + cherrypy.server.socket_port = 80 + cherrypy.quickstart() + """ + + socket_port = 8080 + + _socket_host = '127.0.0.1' + def _get_socket_host(self): + return self._socket_host + def _set_socket_host(self, value): + if not value: + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' instead to listen on all active " + "interfaces (INADDR_ANY).") + self._socket_host = value + socket_host = property(_get_socket_host, _set_socket_host, + doc="""The hostname or IP address on which to listen for connections. + + Host values may be any IPv4 or IPv6 address, or any valid hostname. + The string 'localhost' is a synonym for '127.0.0.1' (or '::1', if + your hosts file prefers IPv6). The string '0.0.0.0' is a special + IPv4 entry meaning "any active interface" (INADDR_ANY), and '::' + is the similar IN6ADDR_ANY for IPv6. The empty string or None are + not allowed.""") + + socket_file = '' + socket_queue_size = 5 + socket_timeout = 10 + shutdown_timeout = 5 + protocol_version = 'HTTP/1.1' + reverse_dns = False + thread_pool = 10 + max_request_header_size = 500 * 1024 + max_request_body_size = 100 * 1024 * 1024 + instance = None + ssl_certificate = None + ssl_private_key = None + nodelay = True + + def __init__(self): + ServerAdapter.__init__(self, cherrypy.engine) + + def quickstart(self, server=None): + """This does nothing now and will be removed in 3.2.""" + warnings.warn('quickstart does nothing now and will be removed in ' + '3.2. Call cherrypy.engine.start() instead.', + DeprecationWarning) + + def httpserver_from_self(self, httpserver=None): + """Return a (httpserver, bind_addr) pair based on self attributes.""" + if httpserver is None: + httpserver = self.instance + if httpserver is None: + from cherrypy import _cpwsgi_server + httpserver = _cpwsgi_server.CPWSGIServer() + if isinstance(httpserver, basestring): + httpserver = attributes(httpserver)() + + if self.socket_file: + return httpserver, self.socket_file + + host = self.socket_host + port = self.socket_port + return httpserver, (host, port) + + def start(self): + """Start the HTTP server.""" + if not self.httpserver: + self.httpserver, self.bind_addr = self.httpserver_from_self() + ServerAdapter.start(self) + start.priority = 75 + + def base(self): + """Return the base (scheme://host) for this server.""" + if self.socket_file: + return self.socket_file + + host = self.socket_host + if host in ('0.0.0.0', '::'): + # 0.0.0.0 is INADDR_ANY and :: is IN6ADDR_ANY. + # Look up the host name, which should be the + # safest thing to spit out in a URL. + import socket + host = socket.gethostname() + + port = self.socket_port + + if self.ssl_certificate: + scheme = "https" + if port != 443: + host += ":%s" % port + else: + scheme = "http" + if port != 80: + host += ":%s" % port + + return "%s://%s" % (scheme, host) + diff --git a/src/cherrypy/_cpthreadinglocal.py b/src/cherrypy/_cpthreadinglocal.py new file mode 100644 index 0000000000..34c17ac41a --- /dev/null +++ b/src/cherrypy/_cpthreadinglocal.py @@ -0,0 +1,239 @@ +# This is a backport of Python-2.4's threading.local() implementation + +"""Thread-local objects + +(Note that this module provides a Python version of thread + threading.local class. Depending on the version of Python you're + using, there may be a faster one available. You should always import + the local class from threading.) + +Thread-local objects support the management of thread-local data. +If you have data that you want to be local to a thread, simply create +a thread-local object and use its attributes: + + >>> mydata = local() + >>> mydata.number = 42 + >>> mydata.number + 42 + +You can also access the local-object's dictionary: + + >>> mydata.__dict__ + {'number': 42} + >>> mydata.__dict__.setdefault('widgets', []) + [] + >>> mydata.widgets + [] + +What's important about thread-local objects is that their data are +local to a thread. If we access the data in a different thread: + + >>> log = [] + >>> def f(): + ... items = mydata.__dict__.items() + ... items.sort() + ... log.append(items) + ... mydata.number = 11 + ... log.append(mydata.number) + + >>> import threading + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[], 11] + +we get different data. Furthermore, changes made in the other thread +don't affect data seen in this thread: + + >>> mydata.number + 42 + +Of course, values you get from a local object, including a __dict__ +attribute, are for whatever thread was current at the time the +attribute was read. For that reason, you generally don't want to save +these values across threads, as they apply only to the thread they +came from. + +You can create custom local objects by subclassing the local class: + + >>> class MyLocal(local): + ... number = 2 + ... initialized = False + ... def __init__(self, **kw): + ... if self.initialized: + ... raise SystemError('__init__ called too many times') + ... self.initialized = True + ... self.__dict__.update(kw) + ... def squared(self): + ... return self.number ** 2 + +This can be useful to support default values, methods and +initialization. Note that if you define an __init__ method, it will be +called each time the local object is used in a separate thread. This +is necessary to initialize each thread's dictionary. + +Now if we create a local object: + + >>> mydata = MyLocal(color='red') + +Now we have a default number: + + >>> mydata.number + 2 + +an initial color: + + >>> mydata.color + 'red' + >>> del mydata.color + +And a method that operates on the data: + + >>> mydata.squared() + 4 + +As before, we can access the data in a separate thread: + + >>> log = [] + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[('color', 'red'), ('initialized', True)], 11] + +without affecting this thread's data: + + >>> mydata.number + 2 + >>> mydata.color + Traceback (most recent call last): + ... + AttributeError: 'MyLocal' object has no attribute 'color' + +Note that subclasses can define slots, but they are not thread +local. They are shared across threads: + + >>> class MyLocal(local): + ... __slots__ = 'number' + + >>> mydata = MyLocal() + >>> mydata.number = 42 + >>> mydata.color = 'red' + +So, the separate thread: + + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + +affects what we see: + + >>> mydata.number + 11 + +>>> del mydata +""" + +# Threading import is at end + +class _localbase(object): + __slots__ = '_local__key', '_local__args', '_local__lock' + + def __new__(cls, *args, **kw): + self = object.__new__(cls) + key = 'thread.local.' + str(id(self)) + object.__setattr__(self, '_local__key', key) + object.__setattr__(self, '_local__args', (args, kw)) + object.__setattr__(self, '_local__lock', RLock()) + + if args or kw and (cls.__init__ is object.__init__): + raise TypeError("Initialization arguments are not supported") + + # We need to create the thread dict in anticipation of + # __init__ being called, to make sure we don't call it + # again ourselves. + dict = object.__getattribute__(self, '__dict__') + currentThread().__dict__[key] = dict + + return self + +def _patch(self): + key = object.__getattribute__(self, '_local__key') + d = currentThread().__dict__.get(key) + if d is None: + d = {} + currentThread().__dict__[key] = d + object.__setattr__(self, '__dict__', d) + + # we have a new instance dict, so call out __init__ if we have + # one + cls = type(self) + if cls.__init__ is not object.__init__: + args, kw = object.__getattribute__(self, '_local__args') + cls.__init__(self, *args, **kw) + else: + object.__setattr__(self, '__dict__', d) + +class local(_localbase): + + def __getattribute__(self, name): + lock = object.__getattribute__(self, '_local__lock') + lock.acquire() + try: + _patch(self) + return object.__getattribute__(self, name) + finally: + lock.release() + + def __setattr__(self, name, value): + lock = object.__getattribute__(self, '_local__lock') + lock.acquire() + try: + _patch(self) + return object.__setattr__(self, name, value) + finally: + lock.release() + + def __delattr__(self, name): + lock = object.__getattribute__(self, '_local__lock') + lock.acquire() + try: + _patch(self) + return object.__delattr__(self, name) + finally: + lock.release() + + + def __del__(): + threading_enumerate = enumerate + __getattribute__ = object.__getattribute__ + + def __del__(self): + key = __getattribute__(self, '_local__key') + + try: + threads = list(threading_enumerate()) + except: + # if enumerate fails, as it seems to do during + # shutdown, we'll skip cleanup under the assumption + # that there is nothing to clean up + return + + for thread in threads: + try: + __dict__ = thread.__dict__ + except AttributeError: + # Thread is dying, rest in peace + continue + + if key in __dict__: + try: + del __dict__[key] + except KeyError: + pass # didn't have anything in this thread + + return __del__ + __del__ = __del__() + +from threading import currentThread, enumerate, RLock diff --git a/src/cherrypy/_cptools.py b/src/cherrypy/_cptools.py new file mode 100644 index 0000000000..930ddab277 --- /dev/null +++ b/src/cherrypy/_cptools.py @@ -0,0 +1,492 @@ +"""CherryPy tools. A "tool" is any helper, adapted to CP. + +Tools are usually designed to be used in a variety of ways (although some +may only offer one if they choose): + + Library calls: + All tools are callables that can be used wherever needed. + The arguments are straightforward and should be detailed within the + docstring. + + Function decorators: + All tools, when called, may be used as decorators which configure + individual CherryPy page handlers (methods on the CherryPy tree). + That is, "@tools.anytool()" should "turn on" the tool via the + decorated function's _cp_config attribute. + + CherryPy config: + If a tool exposes a "_setup" callable, it will be called + once per Request (if the feature is "turned on" via config). + +Tools may be implemented as any object with a namespace. The builtins +are generally either modules or instances of the tools.Tool class. +""" + +import cherrypy + + +def _getargs(func): + """Return the names of all static arguments to the given function.""" + # Use this instead of importing inspect for less mem overhead. + import types + if isinstance(func, types.MethodType): + func = func.im_func + co = func.func_code + return co.co_varnames[:co.co_argcount] + + +class Tool(object): + """A registered function for use with CherryPy request-processing hooks. + + help(tool.callable) should give you more information about this Tool. + """ + + namespace = "tools" + + def __init__(self, point, callable, name=None, priority=50): + self._point = point + self.callable = callable + self._name = name + self._priority = priority + self.__doc__ = self.callable.__doc__ + self._setargs() + + def _setargs(self): + """Copy func parameter names to obj attributes.""" + try: + for arg in _getargs(self.callable): + setattr(self, arg, None) + except (TypeError, AttributeError): + if hasattr(self.callable, "__call__"): + for arg in _getargs(self.callable.__call__): + setattr(self, arg, None) + # IronPython 1.0 raises NotImplementedError because + # inspect.getargspec tries to access Python bytecode + # in co_code attribute. + except NotImplementedError: + pass + # IronPython 1B1 may raise IndexError in some cases, + # but if we trap it here it doesn't prevent CP from working. + except IndexError: + pass + + def _merged_args(self, d=None): + """Return a dict of configuration entries for this Tool.""" + if d: + conf = d.copy() + else: + conf = {} + + tm = cherrypy.request.toolmaps[self.namespace] + if self._name in tm: + conf.update(tm[self._name]) + + if "on" in conf: + del conf["on"] + + return conf + + def __call__(self, *args, **kwargs): + """Compile-time decorator (turn on the tool in config). + + For example: + + @tools.proxy() + def whats_my_base(self): + return cherrypy.request.base + whats_my_base.exposed = True + """ + if args: + raise TypeError("The %r Tool does not accept positional " + "arguments; you must use keyword arguments." + % self._name) + def tool_decorator(f): + if not hasattr(f, "_cp_config"): + f._cp_config = {} + subspace = self.namespace + "." + self._name + "." + f._cp_config[subspace + "on"] = True + for k, v in kwargs.iteritems(): + f._cp_config[subspace + k] = v + return f + return tool_decorator + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + conf = self._merged_args() + p = conf.pop("priority", None) + if p is None: + p = getattr(self.callable, "priority", self._priority) + cherrypy.request.hooks.attach(self._point, self.callable, + priority=p, **conf) + + +class HandlerTool(Tool): + """Tool which is called 'before main', that may skip normal handlers. + + If the tool successfully handles the request (by setting response.body), + if should return True. This will cause CherryPy to skip any 'normal' page + handler. If the tool did not handle the request, it should return False + to tell CherryPy to continue on and call the normal page handler. If the + tool is declared AS a page handler (see the 'handler' method), returning + False will raise NotFound. + """ + + def __init__(self, callable, name=None): + Tool.__init__(self, 'before_handler', callable, name) + + def handler(self, *args, **kwargs): + """Use this tool as a CherryPy page handler. + + For example: + class Root: + nav = tools.staticdir.handler(section="/nav", dir="nav", + root=absDir) + """ + def handle_func(*a, **kw): + handled = self.callable(*args, **self._merged_args(kwargs)) + if not handled: + raise cherrypy.NotFound() + return cherrypy.response.body + handle_func.exposed = True + return handle_func + + def _wrapper(self, **kwargs): + if self.callable(**kwargs): + cherrypy.request.handler = None + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + conf = self._merged_args() + p = conf.pop("priority", None) + if p is None: + p = getattr(self.callable, "priority", self._priority) + cherrypy.request.hooks.attach(self._point, self._wrapper, + priority=p, **conf) + + +class HandlerWrapperTool(Tool): + """Tool which wraps request.handler in a provided wrapper function. + + The 'newhandler' arg must be a handler wrapper function that takes a + 'next_handler' argument, plus *args and **kwargs. Like all page handler + functions, it must return an iterable for use as cherrypy.response.body. + + For example, to allow your 'inner' page handlers to return dicts + which then get interpolated into a template: + + def interpolator(next_handler, *args, **kwargs): + filename = cherrypy.request.config.get('template') + cherrypy.response.template = env.get_template(filename) + response_dict = next_handler(*args, **kwargs) + return cherrypy.response.template.render(**response_dict) + cherrypy.tools.jinja = HandlerWrapperTool(interpolator) + """ + + def __init__(self, newhandler, point='before_handler', name=None, priority=50): + self.newhandler = newhandler + self._point = point + self._name = name + self._priority = priority + + def callable(self): + innerfunc = cherrypy.request.handler + def wrap(*args, **kwargs): + return self.newhandler(innerfunc, *args, **kwargs) + cherrypy.request.handler = wrap + + +class ErrorTool(Tool): + """Tool which is used to replace the default request.error_response.""" + + def __init__(self, callable, name=None): + Tool.__init__(self, None, callable, name) + + def _wrapper(self): + self.callable(**self._merged_args()) + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + cherrypy.request.error_response = self._wrapper + + +# Builtin tools # + +from cherrypy.lib import cptools, encoding, auth, static, tidy +from cherrypy.lib import sessions as _sessions, xmlrpc as _xmlrpc +from cherrypy.lib import caching as _caching, wsgiapp as _wsgiapp + + +class SessionTool(Tool): + """Session Tool for CherryPy. + + sessions.locking: + When 'implicit' (the default), the session will be locked for you, + just before running the page handler. + When 'early', the session will be locked before reading the request + body. This is off by default for safety reasons; for example, + a large upload would block the session, denying an AJAX + progress meter (see http://www.cherrypy.org/ticket/630). + When 'explicit' (or any other value), you need to call + cherrypy.session.acquire_lock() yourself before using + session data. + """ + + def __init__(self): + # _sessions.init must be bound after headers are read + Tool.__init__(self, 'before_request_body', _sessions.init) + + def _lock_session(self): + cherrypy.serving.session.acquire_lock() + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + hooks = cherrypy.request.hooks + + conf = self._merged_args() + + p = conf.pop("priority", None) + if p is None: + p = getattr(self.callable, "priority", self._priority) + + hooks.attach(self._point, self.callable, priority=p, **conf) + + locking = conf.pop('locking', 'implicit') + if locking == 'implicit': + hooks.attach('before_handler', self._lock_session) + elif locking == 'early': + # Lock before the request body (but after _sessions.init runs!) + hooks.attach('before_request_body', self._lock_session, + priority=60) + else: + # Don't lock + pass + + hooks.attach('before_finalize', _sessions.save) + hooks.attach('on_end_request', _sessions.close) + + def regenerate(self): + """Drop the current session and make a new one (with a new id).""" + sess = cherrypy.serving.session + sess.regenerate() + + # Grab cookie-relevant tool args + conf = dict([(k, v) for k, v in self._merged_args().iteritems() + if k in ('path', 'path_header', 'name', 'timeout', + 'domain', 'secure')]) + _sessions.set_response_cookie(**conf) + + + + +class XMLRPCController(object): + """A Controller (page handler collection) for XML-RPC. + + To use it, have your controllers subclass this base class (it will + turn on the tool for you). + + You can also supply the following optional config entries: + + tools.xmlrpc.encoding: 'utf-8' + tools.xmlrpc.allow_none: 0 + + XML-RPC is a rather discontinuous layer over HTTP; dispatching to the + appropriate handler must first be performed according to the URL, and + then a second dispatch step must take place according to the RPC method + specified in the request body. It also allows a superfluous "/RPC2" + prefix in the URL, supplies its own handler args in the body, and + requires a 200 OK "Fault" response instead of 404 when the desired + method is not found. + + Therefore, XML-RPC cannot be implemented for CherryPy via a Tool alone. + This Controller acts as the dispatch target for the first half (based + on the URL); it then reads the RPC method from the request body and + does its own second dispatch step based on that method. It also reads + body params, and returns a Fault on error. + + The XMLRPCDispatcher strips any /RPC2 prefix; if you aren't using /RPC2 + in your URL's, you can safely skip turning on the XMLRPCDispatcher. + Otherwise, you need to use declare it in config: + + request.dispatch: cherrypy.dispatch.XMLRPCDispatcher() + """ + + # Note we're hard-coding this into the 'tools' namespace. We could do + # a huge amount of work to make it relocatable, but the only reason why + # would be if someone actually disabled the default_toolbox. Meh. + _cp_config = {'tools.xmlrpc.on': True} + + def default(self, *vpath, **params): + rpcparams, rpcmethod = _xmlrpc.process_body() + + subhandler = self + for attr in str(rpcmethod).split('.'): + subhandler = getattr(subhandler, attr, None) + + if subhandler and getattr(subhandler, "exposed", False): + body = subhandler(*(vpath + rpcparams), **params) + + else: + # http://www.cherrypy.org/ticket/533 + # if a method is not found, an xmlrpclib.Fault should be returned + # raising an exception here will do that; see + # cherrypy.lib.xmlrpc.on_error + raise Exception, 'method "%s" is not supported' % attr + + conf = cherrypy.request.toolmaps['tools'].get("xmlrpc", {}) + _xmlrpc.respond(body, + conf.get('encoding', 'utf-8'), + conf.get('allow_none', 0)) + return cherrypy.response.body + default.exposed = True + + +class WSGIAppTool(HandlerTool): + """A tool for running any WSGI middleware/application within CP. + + Here are the parameters: + + wsgi_app - any wsgi application callable + env_update - a dictionary with arbitrary keys and values to be + merged with the WSGI environ dictionary. + + Example: + + class Whatever: + _cp_config = {'tools.wsgiapp.on': True, + 'tools.wsgiapp.app': some_app, + 'tools.wsgiapp.env': app_environ, + } + """ + + def _setup(self): + # Keep request body intact so the wsgi app can have its way with it. + cherrypy.request.process_request_body = False + HandlerTool._setup(self) + + +class SessionAuthTool(HandlerTool): + + def _setargs(self): + for name in dir(cptools.SessionAuth): + if not name.startswith("__"): + setattr(self, name, None) + + +class CachingTool(Tool): + """Caching Tool for CherryPy.""" + + def _wrapper(self, invalid_methods=("POST", "PUT", "DELETE"), **kwargs): + request = cherrypy.request + + if not hasattr(cherrypy, "_cache"): + # Make a process-wide Cache object. + cherrypy._cache = kwargs.pop("cache_class", _caching.MemoryCache)() + + # Take all remaining kwargs and set them on the Cache object. + for k, v in kwargs.iteritems(): + setattr(cherrypy._cache, k, v) + + if _caching.get(invalid_methods=invalid_methods): + request.handler = None + else: + if request.cacheable: + # Note the devious technique here of adding hooks on the fly + request.hooks.attach('before_finalize', _caching.tee_output, + priority = 90) + _wrapper.priority = 20 + + def _setup(self): + """Hook caching into cherrypy.request.""" + conf = self._merged_args() + + p = conf.pop("priority", None) + cherrypy.request.hooks.attach('before_handler', self._wrapper, + priority=p, **conf) + + + +class Toolbox(object): + """A collection of Tools. + + This object also functions as a config namespace handler for itself. + Custom toolboxes should be added to each Application's toolboxes dict. + """ + + def __init__(self, namespace): + self.namespace = namespace + + def __setattr__(self, name, value): + # If the Tool._name is None, supply it from the attribute name. + if isinstance(value, Tool): + if value._name is None: + value._name = name + value.namespace = self.namespace + object.__setattr__(self, name, value) + + def __enter__(self): + """Populate request.toolmaps from tools specified in config.""" + cherrypy.request.toolmaps[self.namespace] = map = {} + def populate(k, v): + toolname, arg = k.split(".", 1) + bucket = map.setdefault(toolname, {}) + bucket[arg] = v + return populate + + def __exit__(self, exc_type, exc_val, exc_tb): + """Run tool._setup() for each tool in our toolmap.""" + map = cherrypy.request.toolmaps.get(self.namespace) + if map: + for name, settings in map.items(): + if settings.get("on", False): + tool = getattr(self, name) + tool._setup() + + +default_toolbox = _d = Toolbox("tools") +_d.session_auth = SessionAuthTool(cptools.session_auth) +_d.proxy = Tool('before_request_body', cptools.proxy, priority=30) +_d.response_headers = Tool('on_start_resource', cptools.response_headers) +_d.log_tracebacks = Tool('before_error_response', cptools.log_traceback) +_d.log_headers = Tool('before_error_response', cptools.log_request_headers) +_d.log_hooks = Tool('on_end_request', cptools.log_hooks, priority=100) +_d.err_redirect = ErrorTool(cptools.redirect) +_d.etags = Tool('before_finalize', cptools.validate_etags) +_d.decode = Tool('before_handler', encoding.decode) +# the order of encoding, gzip, caching is important +_d.encode = Tool('before_finalize', encoding.encode, priority=70) +_d.gzip = Tool('before_finalize', encoding.gzip, priority=80) +_d.staticdir = HandlerTool(static.staticdir) +_d.staticfile = HandlerTool(static.staticfile) +_d.sessions = SessionTool() +_d.xmlrpc = ErrorTool(_xmlrpc.on_error) +_d.wsgiapp = WSGIAppTool(_wsgiapp.run) +_d.caching = CachingTool('before_handler', _caching.get, 'caching') +_d.expires = Tool('before_finalize', _caching.expires) +_d.tidy = Tool('before_finalize', tidy.tidy) +_d.nsgmls = Tool('before_finalize', tidy.nsgmls) +_d.ignore_headers = Tool('before_request_body', cptools.ignore_headers) +_d.referer = Tool('before_request_body', cptools.referer) +_d.basic_auth = Tool('on_start_resource', auth.basic_auth) +_d.digest_auth = Tool('on_start_resource', auth.digest_auth) +_d.trailing_slash = Tool('before_handler', cptools.trailing_slash, priority=60) +_d.flatten = Tool('before_finalize', cptools.flatten) +_d.accept = Tool('on_start_resource', cptools.accept) +_d.redirect = Tool('on_start_resource', cptools.redirect) + +del _d, cptools, encoding, auth, static, tidy diff --git a/src/cherrypy/_cptree.py b/src/cherrypy/_cptree.py new file mode 100644 index 0000000000..3dcd4a47e5 --- /dev/null +++ b/src/cherrypy/_cptree.py @@ -0,0 +1,234 @@ +"""CherryPy Application and Tree objects.""" + +import os +import cherrypy +from cherrypy import _cpconfig, _cplogging, _cprequest, _cpwsgi, tools +from cherrypy.lib import http as _http + + +class Application(object): + """A CherryPy Application. + + Servers and gateways should not instantiate Request objects directly. + Instead, they should ask an Application object for a request object. + + An instance of this class may also be used as a WSGI callable + (WSGI application object) for itself. + """ + + __metaclass__ = cherrypy._AttributeDocstrings + + root = None + root__doc = """ + The top-most container of page handlers for this app. Handlers should + be arranged in a hierarchy of attributes, matching the expected URI + hierarchy; the default dispatcher then searches this hierarchy for a + matching handler. When using a dispatcher other than the default, + this value may be None.""" + + config = {} + config__doc = """ + A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict + of {key: value} pairs.""" + + namespaces = _cpconfig.NamespaceSet() + toolboxes = {'tools': cherrypy.tools} + + log = None + log__doc = """A LogManager instance. See _cplogging.""" + + wsgiapp = None + wsgiapp__doc = """A CPWSGIApp instance. See _cpwsgi.""" + + request_class = _cprequest.Request + response_class = _cprequest.Response + + relative_urls = False + + def __init__(self, root, script_name="", config=None): + self.log = _cplogging.LogManager(id(self), cherrypy.log.logger_root) + self.root = root + self.script_name = script_name + self.wsgiapp = _cpwsgi.CPWSGIApp(self) + + self.namespaces = self.namespaces.copy() + self.namespaces["log"] = lambda k, v: setattr(self.log, k, v) + self.namespaces["wsgi"] = self.wsgiapp.namespace_handler + + self.config = self.__class__.config.copy() + if config: + self.merge(config) + + def __repr__(self): + return "%s.%s(%r, %r)" % (self.__module__, self.__class__.__name__, + self.root, self.script_name) + + script_name__doc = """ + The URI "mount point" for this app. A mount point is that portion of + the URI which is constant for all URIs that are serviced by this + application; it does not include scheme, host, or proxy ("virtual host") + portions of the URI. + + For example, if script_name is "/my/cool/app", then the URL + "http://www.example.com/my/cool/app/page1" might be handled by a + "page1" method on the root object. + + The value of script_name MUST NOT end in a slash. If the script_name + refers to the root of the URI, it MUST be an empty string (not "/"). + + If script_name is explicitly set to None, then the script_name will be + provided for each call from request.wsgi_environ['SCRIPT_NAME']. + """ + def _get_script_name(self): + if self._script_name is None: + # None signals that the script name should be pulled from WSGI environ. + return cherrypy.request.wsgi_environ['SCRIPT_NAME'].rstrip("/") + return self._script_name + def _set_script_name(self, value): + if value: + value = value.rstrip("/") + self._script_name = value + script_name = property(fget=_get_script_name, fset=_set_script_name, + doc=script_name__doc) + + def merge(self, config): + """Merge the given config into self.config.""" + _cpconfig.merge(self.config, config) + + # Handle namespaces specified in config. + self.namespaces(self.config.get("/", {})) + + def get_serving(self, local, remote, scheme, sproto): + """Create and return a Request and Response object.""" + req = self.request_class(local, remote, scheme, sproto) + req.app = self + + for name, toolbox in self.toolboxes.iteritems(): + req.namespaces[name] = toolbox + + resp = self.response_class() + cherrypy.serving.load(req, resp) + cherrypy.engine.timeout_monitor.acquire() + cherrypy.engine.publish('acquire_thread') + + return req, resp + + def release_serving(self): + """Release the current serving (request and response).""" + req = cherrypy.serving.request + + cherrypy.engine.timeout_monitor.release() + + try: + req.close() + except: + cherrypy.log(traceback=True, severity=40) + + cherrypy.serving.clear() + + def __call__(self, environ, start_response): + return self.wsgiapp(environ, start_response) + + +class Tree(object): + """A registry of CherryPy applications, mounted at diverse points. + + An instance of this class may also be used as a WSGI callable + (WSGI application object), in which case it dispatches to all + mounted apps. + """ + + apps = {} + apps__doc = """ + A dict of the form {script name: application}, where "script name" + is a string declaring the URI mount point (no trailing slash), and + "application" is an instance of cherrypy.Application (or an arbitrary + WSGI callable if you happen to be using a WSGI server).""" + + def __init__(self): + self.apps = {} + + def mount(self, root, script_name="", config=None): + """Mount a new app from a root object, script_name, and config. + + root: an instance of a "controller class" (a collection of page + handler methods) which represents the root of the application. + script_name: a string containing the "mount point" of the application. + This should start with a slash, and be the path portion of the + URL at which to mount the given root. For example, if root.index() + will handle requests to "http://www.example.com:8080/dept/app1/", + then the script_name argument would be "/dept/app1". + + It MUST NOT end in a slash. If the script_name refers to the + root of the URI, it MUST be an empty string (not "/"). + config: a file or dict containing application config. + """ + # Next line both 1) strips trailing slash and 2) maps "/" -> "". + script_name = script_name.rstrip("/") + + if isinstance(root, Application): + app = root + else: + app = Application(root, script_name) + + # If mounted at "", add favicon.ico + if script_name == "" and root and not hasattr(root, "favicon_ico"): + favicon = os.path.join(os.getcwd(), os.path.dirname(__file__), + "favicon.ico") + root.favicon_ico = tools.staticfile.handler(favicon) + + if config: + app.merge(config) + + self.apps[script_name] = app + + return app + + def graft(self, wsgi_callable, script_name=""): + """Mount a wsgi callable at the given script_name.""" + # Next line both 1) strips trailing slash and 2) maps "/" -> "". + script_name = script_name.rstrip("/") + self.apps[script_name] = wsgi_callable + + def script_name(self, path=None): + """The script_name of the app at the given path, or None. + + If path is None, cherrypy.request is used. + """ + + if path is None: + try: + path = _http.urljoin(cherrypy.request.script_name, + cherrypy.request.path_info) + except AttributeError: + return None + + while True: + if path in self.apps: + return path + + if path == "": + return None + + # Move one node up the tree and try again. + path = path[:path.rfind("/")] + + def __call__(self, environ, start_response): + # If you're calling this, then you're probably setting SCRIPT_NAME + # to '' (some WSGI servers always set SCRIPT_NAME to ''). + # Try to look up the app using the full path. + path = _http.urljoin(environ.get('SCRIPT_NAME', ''), + environ.get('PATH_INFO', '')) + sn = self.script_name(path or "/") + if sn is None: + start_response('404 Not Found', []) + return [] + + app = self.apps[sn] + + # Correct the SCRIPT_NAME and PATH_INFO environ entries. + environ = environ.copy() + environ['SCRIPT_NAME'] = sn + environ['PATH_INFO'] = path[len(sn.rstrip("/")):] + return app(environ, start_response) + diff --git a/src/cherrypy/_cpwsgi.py b/src/cherrypy/_cpwsgi.py new file mode 100644 index 0000000000..b23822e568 --- /dev/null +++ b/src/cherrypy/_cpwsgi.py @@ -0,0 +1,330 @@ +"""WSGI interface (see PEP 333).""" + +import StringIO as _StringIO +import sys as _sys + +import cherrypy as _cherrypy +from cherrypy import _cperror +from cherrypy.lib import http as _http + + +class VirtualHost(object): + """Select a different WSGI application based on the Host header. + + This can be useful when running multiple sites within one CP server. + It allows several domains to point to different applications. For example: + + root = Root() + RootApp = cherrypy.Application(root) + Domain2App = cherrypy.Application(root) + SecureApp = cherrypy.Application(Secure()) + + vhost = cherrypy._cpwsgi.VirtualHost(RootApp, + domains={'www.domain2.example': Domain2App, + 'www.domain2.example:443': SecureApp, + }) + + cherrypy.tree.graft(vhost) + + default: required. The default WSGI application. + + use_x_forwarded_host: if True (the default), any "X-Forwarded-Host" + request header will be used instead of the "Host" header. This + is commonly added by HTTP servers (such as Apache) when proxying. + + domains: a dict of {host header value: application} pairs. + The incoming "Host" request header is looked up in this dict, + and, if a match is found, the corresponding WSGI application + will be called instead of the default. Note that you often need + separate entries for "example.com" and "www.example.com". + In addition, "Host" headers may contain the port number. + """ + + def __init__(self, default, domains=None, use_x_forwarded_host=True): + self.default = default + self.domains = domains or {} + self.use_x_forwarded_host = use_x_forwarded_host + + def __call__(self, environ, start_response): + domain = environ.get('HTTP_HOST', '') + if self.use_x_forwarded_host: + domain = environ.get("HTTP_X_FORWARDED_HOST", domain) + + nextapp = self.domains.get(domain) + if nextapp is None: + nextapp = self.default + return nextapp(environ, start_response) + + + +# WSGI-to-CP Adapter # + + +class AppResponse(object): + + throws = (KeyboardInterrupt, SystemExit) + request = None + + def __init__(self, environ, start_response, cpapp, recursive=False): + self.redirections = [] + self.recursive = recursive + self.environ = environ + self.start_response = start_response + self.cpapp = cpapp + self.setapp() + + def setapp(self): + try: + self.request = self.get_request() + s, h, b = self.get_response() + self.iter_response = iter(b) + self.start_response(s, h) + except self.throws: + self.close() + raise + except _cherrypy.InternalRedirect, ir: + self.environ['cherrypy.previous_request'] = _cherrypy.serving.request + self.close() + self.iredirect(ir.path, ir.query_string) + return + except: + if getattr(self.request, "throw_errors", False): + self.close() + raise + + tb = _cperror.format_exc() + _cherrypy.log(tb, severity=40) + if not getattr(self.request, "show_tracebacks", True): + tb = "" + s, h, b = _cperror.bare_error(tb) + self.iter_response = iter(b) + + try: + self.start_response(s, h, _sys.exc_info()) + except: + # "The application must not trap any exceptions raised by + # start_response, if it called start_response with exc_info. + # Instead, it should allow such exceptions to propagate + # back to the server or gateway." + # But we still log and call close() to clean up ourselves. + _cherrypy.log(traceback=True, severity=40) + self.close() + raise + + def iredirect(self, path, query_string): + """Doctor self.environ and perform an internal redirect. + + When cherrypy.InternalRedirect is raised, this method is called. + It rewrites the WSGI environ using the new path and query_string, + and calls a new CherryPy Request object. Because the wsgi.input + stream may have already been consumed by the next application, + the redirected call will always be of HTTP method "GET"; therefore, + any params must be passed in the query_string argument, which is + formed from InternalRedirect.query_string when using that exception. + If you need something more complicated, make and raise your own + exception and write your own AppResponse subclass to trap it. ;) + + It would be a bad idea to redirect after you've already yielded + response content, although an enterprising soul could choose + to abuse this. + """ + env = self.environ + if not self.recursive: + sn = env.get('SCRIPT_NAME', '') + qs = query_string + if qs: + qs = "?" + qs + if sn + path + qs in self.redirections: + raise RuntimeError("InternalRedirector visited the " + "same URL twice: %r + %r + %r" % + (sn, path, qs)) + else: + # Add the *previous* path_info + qs to redirections. + p = env.get('PATH_INFO', '') + qs = env.get('QUERY_STRING', '') + if qs: + qs = "?" + qs + self.redirections.append(sn + p + qs) + + # Munge environment and try again. + env['REQUEST_METHOD'] = "GET" + env['PATH_INFO'] = path + env['QUERY_STRING'] = query_string + env['wsgi.input'] = _StringIO.StringIO() + env['CONTENT_LENGTH'] = "0" + + self.setapp() + + def __iter__(self): + return self + + def next(self): + try: + chunk = self.iter_response.next() + # WSGI requires all data to be of type "str". This coercion should + # not take any time at all if chunk is already of type "str". + # If it's unicode, it could be a big performance hit (x ~500). + if not isinstance(chunk, str): + chunk = chunk.encode("ISO-8859-1") + return chunk + except self.throws: + self.close() + raise + except _cherrypy.InternalRedirect, ir: + self.environ['cherrypy.previous_request'] = _cherrypy.serving.request + self.close() + self.iredirect(ir.path, ir.query_string) + except StopIteration: + raise + except: + if getattr(self.request, "throw_errors", False): + self.close() + raise + + tb = _cperror.format_exc() + _cherrypy.log(tb, severity=40) + if not getattr(self.request, "show_tracebacks", True): + tb = "" + s, h, b = _cperror.bare_error(tb) + # Empty our iterable (so future calls raise StopIteration) + self.iter_response = iter([]) + + try: + self.start_response(s, h, _sys.exc_info()) + except: + # "The application must not trap any exceptions raised by + # start_response, if it called start_response with exc_info. + # Instead, it should allow such exceptions to propagate + # back to the server or gateway." + # But we still log and call close() to clean up ourselves. + _cherrypy.log(traceback=True, severity=40) + self.close() + raise + + return "".join(b) + + def close(self): + """Close and de-reference the current request and response. (Core)""" + self.cpapp.release_serving() + + def get_response(self): + """Run self.request and return its response.""" + meth = self.environ['REQUEST_METHOD'] + path = _http.urljoin(self.environ.get('SCRIPT_NAME', ''), + self.environ.get('PATH_INFO', '')) + qs = self.environ.get('QUERY_STRING', '') + rproto = self.environ.get('SERVER_PROTOCOL') + headers = self.translate_headers(self.environ) + rfile = self.environ['wsgi.input'] + response = self.request.run(meth, path, qs, rproto, headers, rfile) + return response.status, response.header_list, response.body + + def get_request(self): + """Create a Request object using environ.""" + env = self.environ.get + + local = _http.Host('', int(env('SERVER_PORT', 80)), + env('SERVER_NAME', '')) + remote = _http.Host(env('REMOTE_ADDR', ''), + int(env('REMOTE_PORT', -1)), + env('REMOTE_HOST', '')) + scheme = env('wsgi.url_scheme') + sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1") + request, resp = self.cpapp.get_serving(local, remote, scheme, sproto) + + # LOGON_USER is served by IIS, and is the name of the + # user after having been mapped to a local account. + # Both IIS and Apache set REMOTE_USER, when possible. + request.login = env('LOGON_USER') or env('REMOTE_USER') or None + request.multithread = self.environ['wsgi.multithread'] + request.multiprocess = self.environ['wsgi.multiprocess'] + request.wsgi_environ = self.environ + request.prev = env('cherrypy.previous_request', None) + return request + + headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization', + 'CONTENT_LENGTH': 'Content-Length', + 'CONTENT_TYPE': 'Content-Type', + 'REMOTE_HOST': 'Remote-Host', + 'REMOTE_ADDR': 'Remote-Addr', + } + + def translate_headers(self, environ): + """Translate CGI-environ header names to HTTP header names.""" + for cgiName in environ: + # We assume all incoming header keys are uppercase already. + if cgiName in self.headerNames: + yield self.headerNames[cgiName], environ[cgiName] + elif cgiName[:5] == "HTTP_": + # Hackish attempt at recovering original header names. + translatedHeader = cgiName[5:].replace("_", "-") + yield translatedHeader, environ[cgiName] + + +class CPWSGIApp(object): + """A WSGI application object for a CherryPy Application. + + pipeline: a list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a + constructor that takes an initial, positional 'nextapp' argument, + plus optional keyword arguments, and returns a WSGI application + (that takes environ and start_response arguments). The 'name' can + be any you choose, and will correspond to keys in self.config. + + head: rather than nest all apps in the pipeline on each call, it's only + done the first time, and the result is memoized into self.head. Set + this to None again if you change self.pipeline after calling self. + + config: a dict whose keys match names listed in the pipeline. Each + value is a further dict which will be passed to the corresponding + named WSGI callable (from the pipeline) as keyword arguments. + """ + + pipeline = [] + head = None + config = {} + + response_class = AppResponse + + def __init__(self, cpapp, pipeline=None): + self.cpapp = cpapp + self.pipeline = self.pipeline[:] + if pipeline: + self.pipeline.extend(pipeline) + self.config = self.config.copy() + + def tail(self, environ, start_response): + """WSGI application callable for the actual CherryPy application. + + You probably shouldn't call this; call self.__call__ instead, + so that any WSGI middleware in self.pipeline can run first. + """ + return self.response_class(environ, start_response, self.cpapp) + + def __call__(self, environ, start_response): + head = self.head + if head is None: + # Create and nest the WSGI apps in our pipeline (in reverse order). + # Then memoize the result in self.head. + head = self.tail + for name, callable in self.pipeline[::-1]: + conf = self.config.get(name, {}) + head = callable(head, **conf) + self.head = head + return head(environ, start_response) + + def namespace_handler(self, k, v): + """Config handler for the 'wsgi' namespace.""" + if k == "pipeline": + # Note this allows multiple 'wsgi.pipeline' config entries + # (but each entry will be processed in a 'random' order). + # It should also allow developers to set default middleware + # in code (passed to self.__init__) that deployers can add to + # (but not remove) via config. + self.pipeline.extend(v) + elif k == "response_class": + self.response_class = v + else: + name, arg = k.split(".", 1) + bucket = self.config.setdefault(name, {}) + bucket[arg] = v + diff --git a/src/cherrypy/_cpwsgi_server.py b/src/cherrypy/_cpwsgi_server.py new file mode 100644 index 0000000000..5953cf2ab0 --- /dev/null +++ b/src/cherrypy/_cpwsgi_server.py @@ -0,0 +1,54 @@ +"""WSGI server interface (see PEP 333). This adds some CP-specific bits to +the framework-agnostic wsgiserver package. +""" + +import cherrypy +from cherrypy import wsgiserver + + +class CPHTTPRequest(wsgiserver.HTTPRequest): + + def __init__(self, sendall, environ, wsgi_app): + s = cherrypy.server + self.max_request_header_size = s.max_request_header_size or 0 + self.max_request_body_size = s.max_request_body_size or 0 + wsgiserver.HTTPRequest.__init__(self, sendall, environ, wsgi_app) + + +class CPHTTPConnection(wsgiserver.HTTPConnection): + + RequestHandlerClass = CPHTTPRequest + + +class CPWSGIServer(wsgiserver.CherryPyWSGIServer): + """Wrapper for wsgiserver.CherryPyWSGIServer. + + wsgiserver has been designed to not reference CherryPy in any way, + so that it can be used in other frameworks and applications. Therefore, + we wrap it here, so we can set our own mount points from cherrypy.tree + and apply some attributes from config -> cherrypy.server -> wsgiserver. + """ + + ConnectionClass = CPHTTPConnection + + def __init__(self): + server = cherrypy.server + sockFile = server.socket_file + if sockFile: + bind_addr = sockFile + else: + bind_addr = (server.socket_host, server.socket_port) + + s = wsgiserver.CherryPyWSGIServer + s.__init__(self, bind_addr, cherrypy.tree, + server.thread_pool, + server.socket_host, + request_queue_size = server.socket_queue_size, + timeout = server.socket_timeout, + shutdown_timeout = server.shutdown_timeout, + ) + self.protocol = server.protocol_version + self.nodelay = server.nodelay + self.ssl_certificate = server.ssl_certificate + self.ssl_private_key = server.ssl_private_key + diff --git a/src/cherrypy/cherryd b/src/cherrypy/cherryd new file mode 100644 index 0000000000..3d5cbdefce --- /dev/null +++ b/src/cherrypy/cherryd @@ -0,0 +1,82 @@ +#! /usr/bin/env python +"""The CherryPy daemon.""" + +import sys + +import cherrypy +from cherrypy.process import plugins, servers + + +def start(configfiles=None, daemonize=False, environment=None, + fastcgi=False, pidfile=None, imports=None): + """Subscribe all engine plugins and start the engine.""" + for i in imports: + exec "import %s" % i + + for c in configfiles or []: + cherrypy.config.update(c) + + engine = cherrypy.engine + + if environment is not None: + cherrypy.config.update({'environment': environment}) + + # Only daemonize if asked to. + if daemonize: + # Don't print anything to stdout/sterr. + cherrypy.config.update({'log.screen': False}) + plugins.Daemonizer(engine).subscribe() + + if pidfile: + plugins.PIDFile(engine, pidfile).subscribe() + + if hasattr(engine, "signal_handler"): + engine.signal_handler.subscribe() + if hasattr(engine, "console_control_handler"): + engine.console_control_handler.subscribe() + + if fastcgi: + # turn off autoreload when using fastcgi + cherrypy.config.update({'autoreload.on': False}) + + cherrypy.server.unsubscribe() + + fastcgi_port = cherrypy.config.get('server.socket_port', 4000) + fastcgi_bindaddr = cherrypy.config.get('server.socket_host', '0.0.0.0') + bindAddress = (fastcgi_bindaddr, fastcgi_port) + f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=bindAddress) + s = servers.ServerAdapter(engine, httpserver=f, bind_addr=bindAddress) + s.subscribe() + + # Always start the engine; this will start all other services + try: + engine.start() + except: + # Assume the error has been logged already via bus.log. + sys.exit(1) + else: + engine.block() + + +if __name__ == '__main__': + from optparse import OptionParser + + p = OptionParser() + p.add_option('-c', '--config', action="append", dest='config', + help="specify config file(s)") + p.add_option('-d', action="store_true", dest='daemonize', + help="run the server as a daemon") + p.add_option('-e', '--environment', dest='environment', default=None, + help="apply the given config environment") + p.add_option('-f', action="store_true", dest='fastcgi', + help="start a fastcgi server instead of the default HTTP server") + p.add_option('-i', '--import', action="append", dest='imports', + help="specify modules to import") + p.add_option('-p', '--pidfile', dest='pidfile', default=None, + help="store the process id in the given file") + options, args = p.parse_args() + + start(options.config, options.daemonize, + options.environment, options.fastcgi, options.pidfile, + options.imports) + diff --git a/src/cherrypy/favicon.ico b/src/cherrypy/favicon.ico new file mode 100644 index 0000000000..f0d7e61bad Binary files /dev/null and b/src/cherrypy/favicon.ico differ diff --git a/src/cherrypy/lib/__init__.py b/src/cherrypy/lib/__init__.py new file mode 100644 index 0000000000..4e225cb12e --- /dev/null +++ b/src/cherrypy/lib/__init__.py @@ -0,0 +1,135 @@ +"""CherryPy Library""" + +import sys as _sys + + +def modules(modulePath): + """Load a module and retrieve a reference to that module.""" + try: + mod = _sys.modules[modulePath] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(modulePath, globals(), locals(), ['']) + return mod + +def attributes(full_attribute_name): + """Load a module and retrieve an attribute of that module.""" + + # Parse out the path, module, and attribute + last_dot = full_attribute_name.rfind(u".") + attr_name = full_attribute_name[last_dot + 1:] + mod_path = full_attribute_name[:last_dot] + + mod = modules(mod_path) + # Let an AttributeError propagate outward. + try: + attr = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + # Return a reference to the attribute. + return attr + + +# public domain "unrepr" implementation, found on the web and then improved. + +class _Builder: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def build_Subscript(self, o): + expr, flags, subs = o.getChildren() + expr = self.build(expr) + subs = self.build(subs) + return expr[subs] + + def build_CallFunc(self, o): + children = map(self.build, o.getChildren()) + callee = children.pop(0) + kwargs = children.pop() or {} + starargs = children.pop() or () + args = tuple(children) + tuple(starargs) + return callee(*args, **kwargs) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + if o.name == 'None': + return None + if o.name == 'True': + return True + if o.name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(o.name) + except ImportError: + pass + + # See if the Name is in __builtin__. + try: + import __builtin__ + return getattr(__builtin__, o.name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(o.name)) + + def build_Add(self, o): + left, right = map(self.build, o.getChildren()) + return left + right + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_NoneType(self, o): + return None + + def build_UnarySub(self, o): + return -self.build(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build(o.getChildren()[0]) + + +def unrepr(s): + """Return a Python object compiled from a string.""" + if not s: + return s + + try: + import compiler + except ImportError: + # Fallback to eval when compiler package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = compiler.parse("__tempvalue__ = " + s) + obj = p.getChildren()[1].getChildren()[0].getChildren()[1] + + return _Builder().build(obj) + diff --git a/src/cherrypy/lib/auth.py b/src/cherrypy/lib/auth.py new file mode 100644 index 0000000000..11a96735e6 --- /dev/null +++ b/src/cherrypy/lib/auth.py @@ -0,0 +1,75 @@ +import cherrypy +from cherrypy.lib import httpauth + + +def check_auth(users, encrypt=None, realm=None): + """If an authorization header contains credentials, return True, else False.""" + if 'authorization' in cherrypy.request.headers: + # make sure the provided credentials are correctly set + ah = httpauth.parseAuthorization(cherrypy.request.headers['authorization']) + if ah is None: + raise cherrypy.HTTPError(400, 'Bad Request') + + if not encrypt: + encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5] + + if callable(users): + try: + # backward compatibility + users = users() # expect it to return a dictionary + + if not isinstance(users, dict): + raise ValueError, "Authentication users must be a dictionary" + + # fetch the user password + password = users.get(ah["username"], None) + except TypeError: + # returns a password (encrypted or clear text) + password = users(ah["username"]) + else: + if not isinstance(users, dict): + raise ValueError, "Authentication users must be a dictionary" + + # fetch the user password + password = users.get(ah["username"], None) + + # validate the authorization by re-computing it here + # and compare it with what the user-agent provided + if httpauth.checkResponse(ah, password, method=cherrypy.request.method, + encrypt=encrypt, realm=realm): + cherrypy.request.login = ah["username"] + return True + + cherrypy.request.login = False + return False + +def basic_auth(realm, users, encrypt=None): + """If auth fails, raise 401 with a basic authentication header. + + realm: a string containing the authentication realm. + users: a dict of the form: {username: password} or a callable returning a dict. + encrypt: callable used to encrypt the password returned from the user-agent. + if None it defaults to a md5 encryption. + """ + if check_auth(users, encrypt): + return + + # inform the user-agent this path is protected + cherrypy.response.headers['www-authenticate'] = httpauth.basicAuth(realm) + + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + +def digest_auth(realm, users): + """If auth fails, raise 401 with a digest authentication header. + + realm: a string containing the authentication realm. + users: a dict of the form: {username: password} or a callable returning a dict. + """ + if check_auth(users, realm=realm): + return + + # inform the user-agent this path is protected + cherrypy.response.headers['www-authenticate'] = httpauth.digestAuth(realm) + + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + diff --git a/src/cherrypy/lib/caching.py b/src/cherrypy/lib/caching.py new file mode 100644 index 0000000000..d43a593279 --- /dev/null +++ b/src/cherrypy/lib/caching.py @@ -0,0 +1,241 @@ +import datetime +import threading +import time + +import cherrypy +from cherrypy.lib import cptools, http + + +class MemoryCache: + + maxobjects = 1000 + maxobj_size = 100000 + maxsize = 10000000 + delay = 600 + + def __init__(self): + self.clear() + t = threading.Thread(target=self.expire_cache, name='expire_cache') + self.expiration_thread = t + t.setDaemon(True) + t.start() + + def clear(self): + """Reset the cache to its initial, empty state.""" + self.cache = {} + self.expirations = {} + self.tot_puts = 0 + self.tot_gets = 0 + self.tot_hist = 0 + self.tot_expires = 0 + self.tot_non_modified = 0 + self.cursize = 0 + + def key(self): + return cherrypy.url(qs=cherrypy.request.query_string) + + def expire_cache(self): + # expire_cache runs in a separate thread which the servers are + # not aware of. It's possible that "time" will be set to None + # arbitrarily, so we check "while time" to avoid exceptions. + # See tickets #99 and #180 for more information. + while time: + now = time.time() + for expiration_time, objects in self.expirations.items(): + if expiration_time <= now: + for obj_size, obj_key in objects: + try: + del self.cache[obj_key] + self.tot_expires += 1 + self.cursize -= obj_size + except KeyError: + # the key may have been deleted elsewhere + pass + del self.expirations[expiration_time] + time.sleep(0.1) + + def get(self): + """Return the object if in the cache, else None.""" + self.tot_gets += 1 + cache_item = self.cache.get(self.key(), None) + if cache_item: + self.tot_hist += 1 + return cache_item + else: + return None + + def put(self, obj): + if len(self.cache) < self.maxobjects: + # Size check no longer includes header length + obj_size = len(obj[2]) + total_size = self.cursize + obj_size + + # checks if there's space for the object + if (obj_size < self.maxobj_size and total_size < self.maxsize): + # add to the expirations list and cache + expiration_time = cherrypy.response.time + self.delay + obj_key = self.key() + bucket = self.expirations.setdefault(expiration_time, []) + bucket.append((obj_size, obj_key)) + self.cache[obj_key] = obj + self.tot_puts += 1 + self.cursize = total_size + + def delete(self): + self.cache.pop(self.key(), None) + + +def get(invalid_methods=("POST", "PUT", "DELETE"), **kwargs): + """Try to obtain cached output. If fresh enough, raise HTTPError(304). + + If POST, PUT, or DELETE: + * invalidates (deletes) any cached response for this resource + * sets request.cached = False + * sets request.cacheable = False + + else if a cached copy exists: + * sets request.cached = True + * sets request.cacheable = False + * sets response.headers to the cached values + * checks the cached Last-Modified response header against the + current If-(Un)Modified-Since request headers; raises 304 + if necessary. + * sets response.status and response.body to the cached values + * returns True + + otherwise: + * sets request.cached = False + * sets request.cacheable = True + * returns False + """ + request = cherrypy.request + + # POST, PUT, DELETE should invalidate (delete) the cached copy. + # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. + if request.method in invalid_methods: + cherrypy._cache.delete() + request.cached = False + request.cacheable = False + return False + + cache_data = cherrypy._cache.get() + request.cached = c = bool(cache_data) + request.cacheable = not c + if c: + response = cherrypy.response + s, h, b, create_time, original_req_headers = cache_data + + # Check 'Vary' selecting headers. If any headers mentioned in "Vary" + # differ between the cached and current request, bail out and + # let the rest of CP handle the request. This should properly + # mimic the behavior of isolated caches as RFC 2616 assumes: + # "If the selecting request header fields for the cached entry + # do not match the selecting request header fields of the new + # request, then the cache MUST NOT use a cached entry to satisfy + # the request unless it first relays the new request to the origin + # server in a conditional request and the server responds with + # 304 (Not Modified), including an entity tag or Content-Location + # that indicates the entity to be used. + # TODO: can we store multiple variants based on Vary'd headers? + for header_element in h.elements('Vary'): + key = header_element.value + if original_req_headers[key] != request.headers.get(key, 'missing'): + request.cached = False + request.cacheable = True + return False + + # Copy the response headers. See http://www.cherrypy.org/ticket/721. + response.headers = rh = http.HeaderMap() + for k in h: + dict.__setitem__(rh, k, dict.__getitem__(h, k)) + + # Add the required Age header + response.headers["Age"] = str(int(response.time - create_time)) + + try: + # Note that validate_since depends on a Last-Modified header; + # this was put into the cached copy, and should have been + # resurrected just above (response.headers = cache_data[1]). + cptools.validate_since() + except cherrypy.HTTPRedirect, x: + if x.status == 304: + cherrypy._cache.tot_non_modified += 1 + raise + + # serve it & get out from the request + response.status = s + response.body = b + return c + + +def tee_output(): + def tee(body): + """Tee response.body into a list.""" + output = [] + for chunk in body: + output.append(chunk) + yield chunk + + # Might as well do this here; why cache if the body isn't consumed? + if response.headers.get('Pragma', None) != 'no-cache': + # save the cache data + body = ''.join(output) + vary = [he.value for he in + cherrypy.response.headers.elements('Vary')] + if vary: + sel_headers = dict([(k, v) for k, v + in cherrypy.request.headers.iteritems() + if k in vary]) + else: + sel_headers = {} + cherrypy._cache.put((response.status, response.headers or {}, + body, response.time, sel_headers)) + + response = cherrypy.response + response.body = tee(response.body) + + +def expires(secs=0, force=False): + """Tool for influencing cache mechanisms using the 'Expires' header. + + 'secs' must be either an int or a datetime.timedelta, and indicates the + number of seconds between response.time and when the response should + expire. The 'Expires' header will be set to (response.time + secs). + + If 'secs' is zero, the 'Expires' header is set one year in the past, and + the following "cache prevention" headers are also set: + 'Pragma': 'no-cache' + 'Cache-Control': 'no-cache, must-revalidate' + + If 'force' is False (the default), the following headers are checked: + 'Etag', 'Last-Modified', 'Age', 'Expires'. If any are already present, + none of the above response headers are set. + """ + + response = cherrypy.response + headers = response.headers + + cacheable = False + if not force: + # some header names that indicate that the response can be cached + for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): + if indicator in headers: + cacheable = True + break + + if not cacheable: + if isinstance(secs, datetime.timedelta): + secs = (86400 * secs.days) + secs.seconds + + if secs == 0: + if force or "Pragma" not in headers: + headers["Pragma"] = "no-cache" + if cherrypy.request.protocol >= (1, 1): + if force or "Cache-Control" not in headers: + headers["Cache-Control"] = "no-cache, must-revalidate" + # Set an explicit Expires date in the past. + expiry = http.HTTPDate(1169942400.0) + else: + expiry = http.HTTPDate(response.time + secs) + if force or "Expires" not in headers: + headers["Expires"] = expiry diff --git a/src/cherrypy/lib/covercp.py b/src/cherrypy/lib/covercp.py new file mode 100644 index 0000000000..ecc8cf47da --- /dev/null +++ b/src/cherrypy/lib/covercp.py @@ -0,0 +1,361 @@ +"""Code-coverage tools for CherryPy. + +To use this module, or the coverage tools in the test suite, +you need to download 'coverage.py', either Gareth Rees' original +implementation: +http://www.garethrees.org/2001/12/04/python-coverage/ + +or Ned Batchelder's enhanced version: +http://www.nedbatchelder.com/code/modules/coverage.html + +To turn on coverage tracing, use the following code: + + cherrypy.engine.subscribe('start', covercp.start) + cherrypy.engine.subscribe('start_thread', covercp.start) + +Run your code, then use the covercp.serve() function to browse the +results in a web browser. If you run this module from the command line, +it will call serve() for you. +""" + +import re +import sys +import cgi +import urllib +import os, os.path +localFile = os.path.join(os.path.dirname(__file__), "coverage.cache") + +try: + import cStringIO as StringIO +except ImportError: + import StringIO + +try: + from coverage import the_coverage as coverage + def start(threadid=None): + coverage.start() +except ImportError: + # Setting coverage to None will raise errors + # that need to be trapped downstream. + coverage = None + + import warnings + warnings.warn("No code coverage will be performed; coverage.py could not be imported.") + + def start(threadid=None): + pass +start.priority = 20 + +# Guess initial depth to hide FIXME this doesn't work for non-cherrypy stuff +import cherrypy +initial_base = os.path.dirname(cherrypy.__file__) + +TEMPLATE_MENU = """ + + CherryPy Coverage Menu + + + +

CherryPy Coverage

""" + +TEMPLATE_FORM = """ +
+
+ + Show percentages
+ Hide files over %%
+ Exclude files matching
+ +
+ + +
+
""" + +TEMPLATE_FRAMESET = """ +CherryPy coverage data + + + + + +""" % initial_base.lower() + +TEMPLATE_COVERAGE = """ + + Coverage for %(name)s + + + +

%(name)s

+

%(fullpath)s

+

Coverage: %(pc)s%%

""" + +TEMPLATE_LOC_COVERED = """ + %s  + %s +\n""" +TEMPLATE_LOC_NOT_COVERED = """ + %s  + %s +\n""" +TEMPLATE_LOC_EXCLUDED = """ + %s  + %s +\n""" + +TEMPLATE_ITEM = "%s%s%s\n" + +def _percent(statements, missing): + s = len(statements) + e = s - len(missing) + if s > 0: + return int(round(100.0 * e / s)) + return 0 + +def _show_branch(root, base, path, pct=0, showpct=False, exclude=""): + + # Show the directory name and any of our children + dirs = [k for k, v in root.iteritems() if v] + dirs.sort() + for name in dirs: + newpath = os.path.join(path, name) + + if newpath.lower().startswith(base): + relpath = newpath[len(base):] + yield "| " * relpath.count(os.sep) + yield "%s\n" % \ + (newpath, urllib.quote_plus(exclude), name) + + for chunk in _show_branch(root[name], base, newpath, pct, showpct, exclude): + yield chunk + + # Now list the files + if path.lower().startswith(base): + relpath = path[len(base):] + files = [k for k, v in root.iteritems() if not v] + files.sort() + for name in files: + newpath = os.path.join(path, name) + + pc_str = "" + if showpct: + try: + _, statements, _, missing, _ = coverage.analysis2(newpath) + except: + # Yes, we really want to pass on all errors. + pass + else: + pc = _percent(statements, missing) + pc_str = ("%3d%% " % pc).replace(' ',' ') + if pc < float(pct) or pc == -1: + pc_str = "%s" % pc_str + else: + pc_str = "%s" % pc_str + + yield TEMPLATE_ITEM % ("| " * (relpath.count(os.sep) + 1), + pc_str, newpath, name) + +def _skip_file(path, exclude): + if exclude: + return bool(re.search(exclude, path)) + +def _graft(path, tree): + d = tree + + p = path + atoms = [] + while True: + p, tail = os.path.split(p) + if not tail: + break + atoms.append(tail) + atoms.append(p) + if p != "/": + atoms.append("/") + + atoms.reverse() + for node in atoms: + if node: + d = d.setdefault(node, {}) + +def get_tree(base, exclude): + """Return covered module names as a nested dict.""" + tree = {} + coverage.get_ready() + runs = coverage.cexecuted.keys() + if runs: + for path in runs: + if not _skip_file(path, exclude) and not os.path.isdir(path): + _graft(path, tree) + return tree + +class CoverStats(object): + + def index(self): + return TEMPLATE_FRAMESET + index.exposed = True + + def menu(self, base="/", pct="50", showpct="", + exclude=r'python\d\.\d|test|tut\d|tutorial'): + + # The coverage module uses all-lower-case names. + base = base.lower().rstrip(os.sep) + + yield TEMPLATE_MENU + yield TEMPLATE_FORM % locals() + + # Start by showing links for parent paths + yield "
" + path = "" + atoms = base.split(os.sep) + atoms.pop() + for atom in atoms: + path += atom + os.sep + yield ("%s %s" + % (path, urllib.quote_plus(exclude), atom, os.sep)) + yield "
" + + yield "
" + + # Then display the tree + tree = get_tree(base, exclude) + if not tree: + yield "

No modules covered.

" + else: + for chunk in _show_branch(tree, base, "/", pct, + showpct=='checked', exclude): + yield chunk + + yield "
" + yield "" + menu.exposed = True + + def annotated_file(self, filename, statements, excluded, missing): + source = open(filename, 'r') + buffer = [] + for lineno, line in enumerate(source.readlines()): + lineno += 1 + line = line.strip("\n\r") + empty_the_buffer = True + if lineno in excluded: + template = TEMPLATE_LOC_EXCLUDED + elif lineno in missing: + template = TEMPLATE_LOC_NOT_COVERED + elif lineno in statements: + template = TEMPLATE_LOC_COVERED + else: + empty_the_buffer = False + buffer.append((lineno, line)) + if empty_the_buffer: + for lno, pastline in buffer: + yield template % (lno, cgi.escape(pastline)) + buffer = [] + yield template % (lineno, cgi.escape(line)) + + def report(self, name): + coverage.get_ready() + filename, statements, excluded, missing, _ = coverage.analysis2(name) + pc = _percent(statements, missing) + yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name), + fullpath=name, + pc=pc) + yield '\n' + for line in self.annotated_file(filename, statements, excluded, + missing): + yield line + yield '
' + yield '' + yield '' + report.exposed = True + + +def serve(path=localFile, port=8080): + if coverage is None: + raise ImportError("The coverage module could not be imported.") + coverage.cache_default = path + + import cherrypy + cherrypy.config.update({'server.socket_port': port, + 'server.thread_pool': 10, + 'environment': "production", + }) + cherrypy.quickstart(CoverStats()) + +if __name__ == "__main__": + serve(*tuple(sys.argv[1:])) + diff --git a/src/cherrypy/lib/cptools.py b/src/cherrypy/lib/cptools.py new file mode 100644 index 0000000000..966c328945 --- /dev/null +++ b/src/cherrypy/lib/cptools.py @@ -0,0 +1,428 @@ +"""Functions for builtin CherryPy tools.""" + +import logging +import md5 +import re + +import cherrypy +from cherrypy.lib import http as _http + + +# Conditional HTTP request support # + +def validate_etags(autotags=False): + """Validate the current ETag against If-Match, If-None-Match headers. + + If autotags is True, an ETag response-header value will be provided + from an MD5 hash of the response body (unless some other code has + already provided an ETag header). If False (the default), the ETag + will not be automatic. + + WARNING: the autotags feature is not designed for URL's which allow + methods other than GET. For example, if a POST to the same URL returns + no content, the automatic ETag will be incorrect, breaking a fundamental + use for entity tags in a possibly destructive fashion. Likewise, if you + raise 304 Not Modified, the response body will be empty, the ETag hash + will be incorrect, and your application will break. + See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24 + """ + response = cherrypy.response + + # Guard against being run twice. + if hasattr(response, "ETag"): + return + + status, reason, msg = _http.valid_status(response.status) + + etag = response.headers.get('ETag') + + # Automatic ETag generation. See warning in docstring. + if (not etag) and autotags: + if status == 200: + etag = response.collapse_body() + etag = '"%s"' % md5.new(etag).hexdigest() + response.headers['ETag'] = etag + + response.ETag = etag + + # "If the request would, without the If-Match header field, result in + # anything other than a 2xx or 412 status, then the If-Match header + # MUST be ignored." + if status >= 200 and status <= 299: + request = cherrypy.request + + conditions = request.headers.elements('If-Match') or [] + conditions = [str(x) for x in conditions] + if conditions and not (conditions == ["*"] or etag in conditions): + raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did " + "not match %r" % (etag, conditions)) + + conditions = request.headers.elements('If-None-Match') or [] + conditions = [str(x) for x in conditions] + if conditions == ["*"] or etag in conditions: + if request.method in ("GET", "HEAD"): + raise cherrypy.HTTPRedirect([], 304) + else: + raise cherrypy.HTTPError(412, "If-None-Match failed: ETag %r " + "matched %r" % (etag, conditions)) + +def validate_since(): + """Validate the current Last-Modified against If-Modified-Since headers. + + If no code has set the Last-Modified response header, then no validation + will be performed. + """ + response = cherrypy.response + lastmod = response.headers.get('Last-Modified') + if lastmod: + status, reason, msg = _http.valid_status(response.status) + + request = cherrypy.request + + since = request.headers.get('If-Unmodified-Since') + if since and since != lastmod: + if (status >= 200 and status <= 299) or status == 412: + raise cherrypy.HTTPError(412) + + since = request.headers.get('If-Modified-Since') + if since and since == lastmod: + if (status >= 200 and status <= 299) or status == 304: + if request.method in ("GET", "HEAD"): + raise cherrypy.HTTPRedirect([], 304) + else: + raise cherrypy.HTTPError(412) + + +# Tool code # + +def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', + scheme='X-Forwarded-Proto'): + """Change the base URL (scheme://host[:port][/path]). + + For running a CP server behind Apache, lighttpd, or other HTTP server. + + If you want the new request.base to include path info (not just the host), + you must explicitly set base to the full base path, and ALSO set 'local' + to '', so that the X-Forwarded-Host request header (which never includes + path info) does not override it. + + cherrypy.request.remote.ip (the IP address of the client) will be + rewritten if the header specified by the 'remote' arg is valid. + By default, 'remote' is set to 'X-Forwarded-For'. If you do not + want to rewrite remote.ip, set the 'remote' arg to an empty string. + """ + + request = cherrypy.request + + if scheme: + s = request.headers.get(scheme, None) + if s == 'on' and 'ssl' in scheme.lower(): + # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header + scheme = 'https' + else: + # This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https' + scheme = s + if not scheme: + scheme = request.base[:request.base.find("://")] + + if local: + base = request.headers.get(local, base) + if not base: + port = cherrypy.request.local.port + if port == 80: + base = '127.0.0.1' + else: + base = '127.0.0.1:%s' % port + + if base.find("://") == -1: + # add http:// or https:// if needed + base = scheme + "://" + base + + request.base = base + + if remote: + xff = request.headers.get(remote) + if xff: + if remote == 'X-Forwarded-For': + # See http://bob.pythonmac.org/archives/2005/09/23/apache-x-forwarded-for-caveat/ + xff = xff.split(',')[-1].strip() + request.remote.ip = xff + + +def ignore_headers(headers=('Range',)): + """Delete request headers whose field names are included in 'headers'. + + This is a useful tool for working behind certain HTTP servers; + for example, Apache duplicates the work that CP does for 'Range' + headers, and will doubly-truncate the response. + """ + request = cherrypy.request + for name in headers: + if name in request.headers: + del request.headers[name] + + +def response_headers(headers=None): + """Set headers on the response.""" + for name, value in (headers or []): + cherrypy.response.headers[name] = value +response_headers.failsafe = True + + +def referer(pattern, accept=True, accept_missing=False, error=403, + message='Forbidden Referer header.'): + """Raise HTTPError if Referer header does/does not match the given pattern. + + pattern: a regular expression pattern to test against the Referer. + accept: if True, the Referer must match the pattern; if False, + the Referer must NOT match the pattern. + accept_missing: if True, permit requests with no Referer header. + error: the HTTP error code to return to the client on failure. + message: a string to include in the response body on failure. + """ + try: + match = bool(re.match(pattern, cherrypy.request.headers['Referer'])) + if accept == match: + return + except KeyError: + if accept_missing: + return + + raise cherrypy.HTTPError(error, message) + + +class SessionAuth(object): + """Assert that the user is logged in.""" + + session_key = "username" + + def check_username_and_password(self, username, password): + pass + + def anonymous(self): + """Provide a temporary user name for anonymous users.""" + pass + + def on_login(self, username): + pass + + def on_logout(self, username): + pass + + def on_check(self, username): + pass + + def login_screen(self, from_page='..', username='', error_msg=''): + return """ +Message: %(error_msg)s +
+ Login:
+ Password:
+
+ +
+""" % {'from_page': from_page, 'username': username, + 'error_msg': error_msg} + + def do_login(self, username, password, from_page='..'): + """Login. May raise redirect, or return True if request handled.""" + error_msg = self.check_username_and_password(username, password) + if error_msg: + body = self.login_screen(from_page, username, error_msg) + cherrypy.response.body = body + if cherrypy.response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del cherrypy.response.headers["Content-Length"] + return True + else: + cherrypy.session[self.session_key] = cherrypy.request.login = username + self.on_login(username) + raise cherrypy.HTTPRedirect(from_page or "/") + + def do_logout(self, from_page='..'): + """Logout. May raise redirect, or return True if request handled.""" + sess = cherrypy.session + username = sess.get(self.session_key) + sess[self.session_key] = None + if username: + cherrypy.request.login = None + self.on_logout(username) + raise cherrypy.HTTPRedirect(from_page) + + def do_check(self): + """Assert username. May raise redirect, or return True if request handled.""" + sess = cherrypy.session + request = cherrypy.request + + username = sess.get(self.session_key) + if not username: + sess[self.session_key] = username = self.anonymous() + if not username: + cherrypy.response.body = self.login_screen(cherrypy.url(qs=request.query_string)) + if cherrypy.response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del cherrypy.response.headers["Content-Length"] + return True + cherrypy.request.login = username + self.on_check(username) + + def run(self): + request = cherrypy.request + path = request.path_info + if path.endswith('login_screen'): + return self.login_screen(**request.params) + elif path.endswith('do_login'): + return self.do_login(**request.params) + elif path.endswith('do_logout'): + return self.do_logout(**request.params) + else: + return self.do_check() + + +def session_auth(**kwargs): + sa = SessionAuth() + for k, v in kwargs.iteritems(): + setattr(sa, k, v) + return sa.run() +session_auth.__doc__ = """Session authentication hook. + +Any attribute of the SessionAuth class may be overridden via a keyword arg +to this function: + +""" + "\n".join(["%s: %s" % (k, type(getattr(SessionAuth, k)).__name__) + for k in dir(SessionAuth) if not k.startswith("__")]) + + +def log_traceback(severity=logging.DEBUG): + """Write the last error's traceback to the cherrypy error log.""" + cherrypy.log("", "HTTP", severity=severity, traceback=True) + +def log_request_headers(): + """Write request headers to the cherrypy error log.""" + h = [" %s: %s" % (k, v) for k, v in cherrypy.request.header_list] + cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP") + +def log_hooks(): + """Write request.hooks to the cherrypy error log.""" + msg = [] + # Sort by the standard points if possible. + from cherrypy import _cprequest + points = _cprequest.hookpoints + for k in cherrypy.request.hooks.keys(): + if k not in points: + points.append(k) + + for k in points: + msg.append(" %s:" % k) + v = cherrypy.request.hooks.get(k, []) + v.sort() + for h in v: + msg.append(" %r" % h) + cherrypy.log('\nRequest Hooks for ' + cherrypy.url() + + ':\n' + '\n'.join(msg), "HTTP") + +def redirect(url='', internal=True): + """Raise InternalRedirect or HTTPRedirect to the given url.""" + if internal: + raise cherrypy.InternalRedirect(url) + else: + raise cherrypy.HTTPRedirect(url) + +def trailing_slash(missing=True, extra=False): + """Redirect if path_info has (missing|extra) trailing slash.""" + request = cherrypy.request + pi = request.path_info + + if request.is_index is True: + if missing: + if not pi.endswith('/'): + new_url = cherrypy.url(pi + '/', request.query_string) + raise cherrypy.HTTPRedirect(new_url) + elif request.is_index is False: + if extra: + # If pi == '/', don't redirect to ''! + if pi.endswith('/') and pi != '/': + new_url = cherrypy.url(pi[:-1], request.query_string) + raise cherrypy.HTTPRedirect(new_url) + +def flatten(): + """Wrap response.body in a generator that recursively iterates over body. + + This allows cherrypy.response.body to consist of 'nested generators'; + that is, a set of generators that yield generators. + """ + import types + def flattener(input): + for x in input: + if not isinstance(x, types.GeneratorType): + yield x + else: + for y in flattener(x): + yield y + response = cherrypy.response + response.body = flattener(response.body) + + +def accept(media=None): + """Return the client's preferred media-type (from the given Content-Types). + + If 'media' is None (the default), no test will be performed. + + If 'media' is provided, it should be the Content-Type value (as a string) + or values (as a list or tuple of strings) which the current request + can emit. The client's acceptable media ranges (as declared in the + Accept request header) will be matched in order to these Content-Type + values; the first such string is returned. That is, the return value + will always be one of the strings provided in the 'media' arg (or None + if 'media' is None). + + If no match is found, then HTTPError 406 (Not Acceptable) is raised. + Note that most web browsers send */* as a (low-quality) acceptable + media range, which should match any Content-Type. In addition, "...if + no Accept header field is present, then it is assumed that the client + accepts all media types." + + Matching types are checked in order of client preference first, + and then in the order of the given 'media' values. + + Note that this function does not honor accept-params (other than "q"). + """ + if not media: + return + if isinstance(media, basestring): + media = [media] + + # Parse the Accept request header, and try to match one + # of the requested media-ranges (in order of preference). + ranges = cherrypy.request.headers.elements('Accept') + if not ranges: + # Any media type is acceptable. + return media[0] + else: + # Note that 'ranges' is sorted in order of preference + for element in ranges: + if element.qvalue > 0: + if element.value == "*/*": + # Matches any type or subtype + return media[0] + elif element.value.endswith("/*"): + # Matches any subtype + mtype = element.value[:-1] # Keep the slash + for m in media: + if m.startswith(mtype): + return m + else: + # Matches exact value + if element.value in media: + return element.value + + # No suitable media-range found. + ah = cherrypy.request.headers.get('Accept') + if ah is None: + msg = "Your client did not send an Accept header." + else: + msg = "Your client sent this Accept header: %s." % ah + msg += (" But this resource only emits these media types: %s." % + ", ".join(media)) + raise cherrypy.HTTPError(406, msg) + diff --git a/src/cherrypy/lib/encoding.py b/src/cherrypy/lib/encoding.py new file mode 100644 index 0000000000..b5b77f0d28 --- /dev/null +++ b/src/cherrypy/lib/encoding.py @@ -0,0 +1,265 @@ +import struct +import time + +import cherrypy + + +def decode(encoding=None, default_encoding='utf-8'): + """Decode cherrypy.request.params from str to unicode objects.""" + if not encoding: + ct = cherrypy.request.headers.elements("Content-Type") + if ct: + ct = ct[0] + encoding = ct.params.get("charset", None) + if (not encoding) and ct.value.lower().startswith("text/"): + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + # When no explicit charset parameter is provided by the + # sender, media subtypes of the "text" type are defined + # to have a default charset value of "ISO-8859-1" when + # received via HTTP. + encoding = "ISO-8859-1" + + if not encoding: + encoding = default_encoding + + try: + decode_params(encoding) + except UnicodeDecodeError: + # IE and Firefox don't supply a charset when submitting form + # params with a CT of application/x-www-form-urlencoded. + # So after all our guessing, it could *still* be wrong. + # Start over with ISO-8859-1, since that seems to be preferred. + decode_params("ISO-8859-1") + +def decode_params(encoding): + decoded_params = {} + for key, value in cherrypy.request.params.items(): + if not hasattr(value, 'file'): + # Skip the value if it is an uploaded file + if isinstance(value, list): + # value is a list: decode each element + value = [v.decode(encoding) for v in value] + elif isinstance(value, str): + # value is a regular string: decode it + value = value.decode(encoding) + decoded_params[key] = value + + # Decode all or nothing, so we can try again on error. + cherrypy.request.params = decoded_params + + +# Encoding + +def encode(encoding=None, errors='strict', text_only=True, add_charset=True): + # Guard against running twice + if getattr(cherrypy.request, "_encoding_attempted", False): + return + cherrypy.request._encoding_attempted = True + + ct = cherrypy.response.headers.elements("Content-Type") + if ct: + ct = ct[0] + if (not text_only) or ct.value.lower().startswith("text/"): + # Set "charset=..." param on response Content-Type header + ct.params['charset'] = find_acceptable_charset(encoding, errors=errors) + if add_charset: + cherrypy.response.headers["Content-Type"] = str(ct) + +def encode_stream(encoding, errors='strict'): + """Encode a streaming response body. + + Use a generator wrapper, and just pray it works as the stream is + being written out. + """ + def encoder(body): + for chunk in body: + if isinstance(chunk, unicode): + chunk = chunk.encode(encoding, errors) + yield chunk + cherrypy.response.body = encoder(cherrypy.response.body) + return True + +def encode_string(encoding, errors='strict'): + """Encode a buffered response body.""" + try: + body = [] + for chunk in cherrypy.response.body: + if isinstance(chunk, unicode): + chunk = chunk.encode(encoding, errors) + body.append(chunk) + cherrypy.response.body = body + except (LookupError, UnicodeError): + return False + else: + return True + +def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'): + response = cherrypy.response + + if cherrypy.response.stream: + encoder = encode_stream + else: + response.collapse_body() + encoder = encode_string + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + # Encoded strings may be of different lengths from their + # unicode equivalents, and even from each other. For example: + # >>> t = u"\u7007\u3040" + # >>> len(t) + # 2 + # >>> len(t.encode("UTF-8")) + # 6 + # >>> len(t.encode("utf7")) + # 8 + del response.headers["Content-Length"] + + # Parse the Accept-Charset request header, and try to provide one + # of the requested charsets (in order of user preference). + encs = cherrypy.request.headers.elements('Accept-Charset') + charsets = [enc.value.lower() for enc in encs] + attempted_charsets = [] + + if encoding is not None: + # If specified, force this encoding to be used, or fail. + encoding = encoding.lower() + if (not charsets) or "*" in charsets or encoding in charsets: + if encoder(encoding, errors): + return encoding + else: + if not encs: + # Any character-set is acceptable. + if encoder(default_encoding, errors): + return default_encoding + else: + raise cherrypy.HTTPError(500, failmsg % default_encoding) + else: + if "*" not in charsets: + # If no "*" is present in an Accept-Charset field, then all + # character sets not explicitly mentioned get a quality + # value of 0, except for ISO-8859-1, which gets a quality + # value of 1 if not explicitly mentioned. + iso = 'iso-8859-1' + if iso not in charsets: + attempted_charsets.append(iso) + if encoder(iso, errors): + return iso + + for element in encs: + if element.qvalue > 0: + if element.value == "*": + # Matches any charset. Try our default. + if default_encoding not in attempted_charsets: + attempted_charsets.append(default_encoding) + if encoder(default_encoding, errors): + return default_encoding + else: + encoding = element.value + if encoding not in attempted_charsets: + attempted_charsets.append(encoding) + if encoder(encoding, errors): + return encoding + + # No suitable encoding found. + ac = cherrypy.request.headers.get('Accept-Charset') + if ac is None: + msg = "Your client did not send an Accept-Charset header." + else: + msg = "Your client sent this Accept-Charset header: %s." % ac + msg += " We tried these charsets: %s." % ", ".join(attempted_charsets) + raise cherrypy.HTTPError(406, msg) + + +# GZIP + +def compress(body, compress_level): + """Compress 'body' at the given compress_level.""" + import zlib + + yield '\037\213' # magic header + yield '\010' # compression method + yield '\0' + yield struct.pack(" 0 is present + * The 'identity' value is given with a qvalue > 0. + """ + response = cherrypy.response + if not response.body: + # Response body is empty (might be a 304 for instance) + return + + # If returning cached content (which should already have been gzipped), + # don't re-zip. + if getattr(cherrypy.request, "cached", False): + return + + acceptable = cherrypy.request.headers.elements('Accept-Encoding') + if not acceptable: + # If no Accept-Encoding field is present in a request, + # the server MAY assume that the client will accept any + # content coding. In this case, if "identity" is one of + # the available content-codings, then the server SHOULD use + # the "identity" content-coding, unless it has additional + # information that a different content-coding is meaningful + # to the client. + return + + ct = response.headers.get('Content-Type').split(';')[0] + for coding in acceptable: + if coding.value == 'identity' and coding.qvalue != 0: + return + if coding.value in ('gzip', 'x-gzip'): + if coding.qvalue == 0: + return + if ct in mime_types: + # Return a generator that compresses the page + varies = response.headers.get("Vary", "") + varies = [x.strip() for x in varies.split(",") if x.strip()] + if "Accept-Encoding" not in varies: + varies.append("Accept-Encoding") + response.headers['Vary'] = ", ".join(varies) + + response.headers['Content-Encoding'] = 'gzip' + response.body = compress(response.body, compress_level) + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return + cherrypy.HTTPError(406, "identity, gzip").set_response() diff --git a/src/cherrypy/lib/http.py b/src/cherrypy/lib/http.py new file mode 100644 index 0000000000..5449d55b7f --- /dev/null +++ b/src/cherrypy/lib/http.py @@ -0,0 +1,400 @@ +"""HTTP library functions.""" + +# This module contains functions for building an HTTP application +# framework: any one, not just one whose name starts with "Ch". ;) If you +# reference any modules from some popular framework inside *this* module, +# FuManChu will personally hang you up by your thumbs and submit you +# to a public caning. + +from BaseHTTPServer import BaseHTTPRequestHandler +response_codes = BaseHTTPRequestHandler.responses.copy() + +# From http://www.cherrypy.org/ticket/361 +response_codes[500] = ('Internal Server Error', + 'The server encountered an unexpected condition ' + 'which prevented it from fulfilling the request.') +response_codes[503] = ('Service Unavailable', + 'The server is currently unable to handle the ' + 'request due to a temporary overloading or ' + 'maintenance of the server.') + + +import cgi +import re +from rfc822 import formatdate as HTTPDate + + +def urljoin(*atoms): + """Return the given path *atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = "/".join([x for x in atoms if x]) + while "//" in url: + url = url.replace("//", "/") + # Special-case the final url of "", and return "/" instead. + return url or "/" + +def protocol_from_http(protocol_str): + """Return a protocol tuple from the given 'HTTP/x.y' string.""" + return int(protocol_str[5]), int(protocol_str[7]) + +def get_ranges(headervalue, content_length): + """Return a list of (start, stop) indices from a Range header, or None. + + Each (start, stop) tuple will be composed of two ints, which are suitable + for use in a slicing operation. That is, the header "Range: bytes=3-6", + if applied against a Python string, is requesting resource[3:7]. This + function will return the list [(3, 7)]. + + If this function returns an empty list, you should return HTTP 416. + """ + + if not headervalue: + return None + + result = [] + bytesunit, byteranges = headervalue.split("=", 1) + for brange in byteranges.split(","): + start, stop = [x.strip() for x in brange.split("-", 1)] + if start: + if not stop: + stop = content_length - 1 + start, stop = map(int, (start, stop)) + if start >= content_length: + # From rfc 2616 sec 14.16: + # "If the server receives a request (other than one + # including an If-Range request-header field) with an + # unsatisfiable Range request-header field (that is, + # all of whose byte-range-spec values have a first-byte-pos + # value greater than the current length of the selected + # resource), it SHOULD return a response code of 416 + # (Requested range not satisfiable)." + continue + if stop < start: + # From rfc 2616 sec 14.16: + # "If the server ignores a byte-range-spec because it + # is syntactically invalid, the server SHOULD treat + # the request as if the invalid Range header field + # did not exist. (Normally, this means return a 200 + # response containing the full entity)." + return None + result.append((start, stop + 1)) + else: + if not stop: + # See rfc quote above. + return None + # Negative subscript (last N bytes) + result.append((content_length - int(stop), content_length)) + + return result + + +class HeaderElement(object): + """An element (with parameters) from an HTTP header's element list.""" + + def __init__(self, value, params=None): + self.value = value + if params is None: + params = {} + self.params = params + + def __unicode__(self): + p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()] + return u"%s%s" % (self.value, "".join(p)) + + def __str__(self): + return str(self.__unicode__()) + + def parse(elementstr): + """Transform 'token;key=val' to ('token', {'key': 'val'}).""" + # Split the element into a value and parameters. The 'value' may + # be of the form, "token=token", but we don't split that here. + atoms = [x.strip() for x in elementstr.split(";") if x.strip()] + initial_value = atoms.pop(0).strip() + params = {} + for atom in atoms: + atom = [x.strip() for x in atom.split("=", 1) if x.strip()] + key = atom.pop(0) + if atom: + val = atom[0] + else: + val = "" + params[key] = val + return initial_value, params + parse = staticmethod(parse) + + def from_str(cls, elementstr): + """Construct an instance from a string of the form 'token;key=val'.""" + ival, params = cls.parse(elementstr) + return cls(ival, params) + from_str = classmethod(from_str) + + +q_separator = re.compile(r'; *q *=') + +class AcceptElement(HeaderElement): + """An element (with parameters) from an Accept* header's element list. + + AcceptElement objects are comparable; the more-preferred object will be + "less than" the less-preferred object. They are also therefore sortable; + if you sort a list of AcceptElement objects, they will be listed in + priority order; the most preferred value will be first. Yes, it should + have been the other way around, but it's too late to fix now. + """ + + def from_str(cls, elementstr): + qvalue = None + # The first "q" parameter (if any) separates the initial + # media-range parameter(s) (if any) from the accept-params. + atoms = q_separator.split(elementstr, 1) + media_range = atoms.pop(0).strip() + if atoms: + # The qvalue for an Accept header can have extensions. The other + # headers cannot, but it's easier to parse them as if they did. + qvalue = HeaderElement.from_str(atoms[0].strip()) + + media_type, params = cls.parse(media_range) + if qvalue is not None: + params["q"] = qvalue + return cls(media_type, params) + from_str = classmethod(from_str) + + def qvalue(self): + val = self.params.get("q", "1") + if isinstance(val, HeaderElement): + val = val.value + return float(val) + qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") + + def __cmp__(self, other): + diff = cmp(other.qvalue, self.qvalue) + if diff == 0: + diff = cmp(str(other), str(self)) + return diff + + +def header_elements(fieldname, fieldvalue): + """Return a HeaderElement list from a comma-separated header str.""" + + if not fieldvalue: + return None + headername = fieldname.lower() + + result = [] + for element in fieldvalue.split(","): + if headername.startswith("accept") or headername == 'te': + hv = AcceptElement.from_str(element) + else: + hv = HeaderElement.from_str(element) + result.append(hv) + + result.sort() + return result + +def decode_TEXT(value): + """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr").""" + from email.Header import decode_header + atoms = decode_header(value) + decodedvalue = "" + for atom, charset in atoms: + if charset is not None: + atom = atom.decode(charset) + decodedvalue += atom + return decodedvalue + +def valid_status(status): + """Return legal HTTP status Code, Reason-phrase and Message. + + The status arg must be an int, or a str that begins with an int. + + If status is an int, or a str and no reason-phrase is supplied, + a default reason-phrase will be provided. + """ + + if not status: + status = 200 + + status = str(status) + parts = status.split(" ", 1) + if len(parts) == 1: + # No reason supplied. + code, = parts + reason = None + else: + code, reason = parts + reason = reason.strip() + + try: + code = int(code) + except ValueError: + raise ValueError("Illegal response status from server " + "(%s is non-numeric)." % repr(code)) + + if code < 100 or code > 599: + raise ValueError("Illegal response status from server " + "(%s is out of range)." % repr(code)) + + if code not in response_codes: + # code is unknown but not illegal + default_reason, message = "", "" + else: + default_reason, message = response_codes[code] + + if reason is None: + reason = default_reason + + return code, reason, message + + +image_map_pattern = re.compile(r"[0-9]+,[0-9]+") + +def parse_query_string(query_string, keep_blank_values=True): + """Build a params dictionary from a query_string.""" + if image_map_pattern.match(query_string): + # Server-side image map. Map the coords to 'x' and 'y' + # (like CGI::Request does). + pm = query_string.split(",") + pm = {'x': int(pm[0]), 'y': int(pm[1])} + else: + pm = cgi.parse_qs(query_string, keep_blank_values) + for key, val in pm.items(): + if len(val) == 1: + pm[key] = val[0] + return pm + +def params_from_CGI_form(form): + params = {} + for key in form.keys(): + value_list = form[key] + if isinstance(value_list, list): + params[key] = [] + for item in value_list: + if item.filename is not None: + value = item # It's a file upload + else: + value = item.value # It's a regular field + params[key].append(value) + else: + if value_list.filename is not None: + value = value_list # It's a file upload + else: + value = value_list.value # It's a regular field + params[key] = value + return params + + +class CaseInsensitiveDict(dict): + """A case-insensitive dict subclass. + + Each key is changed on entry to str(key).title(). + """ + + def __getitem__(self, key): + return dict.__getitem__(self, str(key).title()) + + def __setitem__(self, key, value): + dict.__setitem__(self, str(key).title(), value) + + def __delitem__(self, key): + dict.__delitem__(self, str(key).title()) + + def __contains__(self, key): + return dict.__contains__(self, str(key).title()) + + def get(self, key, default=None): + return dict.get(self, str(key).title(), default) + + def has_key(self, key): + return dict.has_key(self, str(key).title()) + + def update(self, E): + for k in E.keys(): + self[str(k).title()] = E[k] + + def fromkeys(cls, seq, value=None): + newdict = cls() + for k in seq: + newdict[str(k).title()] = value + return newdict + fromkeys = classmethod(fromkeys) + + def setdefault(self, key, x=None): + key = str(key).title() + try: + return self[key] + except KeyError: + self[key] = x + return x + + def pop(self, key, default): + return dict.pop(self, str(key).title(), default) + + +class HeaderMap(CaseInsensitiveDict): + """A dict subclass for HTTP request and response headers. + + Each key is changed on entry to str(key).title(). This allows headers + to be case-insensitive and avoid duplicates. + + Values are header values (decoded according to RFC 2047 if necessary). + """ + + def elements(self, key): + """Return a list of HeaderElements for the given header (or None).""" + key = str(key).title() + h = self.get(key) + if h is None: + return [] + return header_elements(key, h) + + def output(self, protocol=(1, 1)): + """Transform self into a list of (name, value) tuples.""" + header_list = [] + for key, v in self.iteritems(): + if isinstance(v, unicode): + # HTTP/1.0 says, "Words of *TEXT may contain octets + # from character sets other than US-ASCII." and + # "Recipients of header field TEXT containing octets + # outside the US-ASCII character set may assume that + # they represent ISO-8859-1 characters." + try: + v = v.encode("iso-8859-1") + except UnicodeEncodeError: + if protocol >= (1, 1): + # Encode RFC-2047 TEXT + # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). + from email.Header import Header + v = Header(v, 'utf-8').encode() + else: + raise + else: + # This coercion should not take any time at all + # if value is already of type "str". + v = str(v) + header_list.append((key, v)) + return header_list + + + +class Host(object): + """An internet address. + + name should be the client's host name. If not available (because no DNS + lookup is performed), the IP address should be used instead. + """ + + ip = "0.0.0.0" + port = 80 + name = "unknown.tld" + + def __init__(self, ip, port, name=None): + self.ip = ip + self.port = port + if name is None: + name = ip + self.name = name + + def __repr__(self): + return "http.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/src/cherrypy/lib/httpauth.py b/src/cherrypy/lib/httpauth.py new file mode 100644 index 0000000000..524db9a6b5 --- /dev/null +++ b/src/cherrypy/lib/httpauth.py @@ -0,0 +1,358 @@ +""" +httpauth modules defines functions to implement HTTP Digest Authentication (RFC 2617). +This has full compliance with 'Digest' and 'Basic' authentication methods. In +'Digest' it supports both MD5 and MD5-sess algorithms. + +Usage: + + First use 'doAuth' to request the client authentication for a + certain resource. You should send an httplib.UNAUTHORIZED response to the + client so he knows he has to authenticate itself. + + Then use 'parseAuthorization' to retrieve the 'auth_map' used in + 'checkResponse'. + + To use 'checkResponse' you must have already verified the password associated + with the 'username' key in 'auth_map' dict. Then you use the 'checkResponse' + function to verify if the password matches the one sent by the client. + +SUPPORTED_ALGORITHM - list of supported 'Digest' algorithms +SUPPORTED_QOP - list of supported 'Digest' 'qop'. +""" +__version__ = 1, 0, 1 +__author__ = "Tiago Cogumbreiro " +__credits__ = """ + Peter van Kampen for its recipe which implement most of Digest authentication: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302378 +""" + +__license__ = """ +Copyright (c) 2005, Tiago Cogumbreiro +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Sylvain Hellegouarch nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse", + "parseAuthorization", "SUPPORTED_ALGORITHM", "md5SessionKey", + "calculateNonce", "SUPPORTED_QOP") + +################################################################################ +import md5 +import time +import base64 +import urllib2 + +MD5 = "MD5" +MD5_SESS = "MD5-sess" +AUTH = "auth" +AUTH_INT = "auth-int" + +SUPPORTED_ALGORITHM = (MD5, MD5_SESS) +SUPPORTED_QOP = (AUTH, AUTH_INT) + +################################################################################ +# doAuth +# +DIGEST_AUTH_ENCODERS = { + MD5: lambda val: md5.new (val).hexdigest (), + MD5_SESS: lambda val: md5.new (val).hexdigest (), +# SHA: lambda val: sha.new (val).hexdigest (), +} + +def calculateNonce (realm, algorithm = MD5): + """This is an auxaliary function that calculates 'nonce' value. It is used + to handle sessions.""" + + global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS + assert algorithm in SUPPORTED_ALGORITHM + + try: + encoder = DIGEST_AUTH_ENCODERS[algorithm] + except KeyError: + raise NotImplementedError ("The chosen algorithm (%s) does not have "\ + "an implementation yet" % algorithm) + + return encoder ("%d:%s" % (time.time(), realm)) + +def digestAuth (realm, algorithm = MD5, nonce = None, qop = AUTH): + """Challenges the client for a Digest authentication.""" + global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS, SUPPORTED_QOP + assert algorithm in SUPPORTED_ALGORITHM + assert qop in SUPPORTED_QOP + + if nonce is None: + nonce = calculateNonce (realm, algorithm) + + return 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( + realm, nonce, algorithm, qop + ) + +def basicAuth (realm): + """Challengenes the client for a Basic authentication.""" + assert '"' not in realm, "Realms cannot contain the \" (quote) character." + + return 'Basic realm="%s"' % realm + +def doAuth (realm): + """'doAuth' function returns the challenge string b giving priority over + Digest and fallback to Basic authentication when the browser doesn't + support the first one. + + This should be set in the HTTP header under the key 'WWW-Authenticate'.""" + + return digestAuth (realm) + " " + basicAuth (realm) + + +################################################################################ +# Parse authorization parameters +# +def _parseDigestAuthorization (auth_params): + # Convert the auth params to a dict + items = urllib2.parse_http_list (auth_params) + params = urllib2.parse_keqv_list (items) + + # Now validate the params + + # Check for required parameters + required = ["username", "realm", "nonce", "uri", "response"] + for k in required: + if not params.has_key(k): + return None + + # If qop is sent then cnonce and nc MUST be present + if params.has_key("qop") and not (params.has_key("cnonce") \ + and params.has_key("nc")): + return None + + # If qop is not sent, neither cnonce nor nc can be present + if (params.has_key("cnonce") or params.has_key("nc")) and \ + not params.has_key("qop"): + return None + + return params + + +def _parseBasicAuthorization (auth_params): + username, password = base64.decodestring (auth_params).split (":", 1) + return {"username": username, "password": password} + +AUTH_SCHEMES = { + "basic": _parseBasicAuthorization, + "digest": _parseDigestAuthorization, +} + +def parseAuthorization (credentials): + """parseAuthorization will convert the value of the 'Authorization' key in + the HTTP header to a map itself. If the parsing fails 'None' is returned. + """ + + global AUTH_SCHEMES + + auth_scheme, auth_params = credentials.split(" ", 1) + auth_scheme = auth_scheme.lower () + + parser = AUTH_SCHEMES[auth_scheme] + params = parser (auth_params) + + if params is None: + return + + assert "auth_scheme" not in params + params["auth_scheme"] = auth_scheme + return params + + +################################################################################ +# Check provided response for a valid password +# +def md5SessionKey (params, password): + """ + If the "algorithm" directive's value is "MD5-sess", then A1 + [the session key] is calculated only once - on the first request by the + client following receipt of a WWW-Authenticate challenge from the server. + + This creates a 'session key' for the authentication of subsequent + requests and responses which is different for each "authentication + session", thus limiting the amount of material hashed with any one + key. + + Because the server need only use the hash of the user + credentials in order to create the A1 value, this construction could + be used in conjunction with a third party authentication service so + that the web server would not need the actual password value. The + specification of such a protocol is beyond the scope of this + specification. +""" + + keys = ("username", "realm", "nonce", "cnonce") + params_copy = {} + for key in keys: + params_copy[key] = params[key] + + params_copy["algorithm"] = MD5_SESS + return _A1 (params_copy, password) + +def _A1(params, password): + algorithm = params.get ("algorithm", MD5) + H = DIGEST_AUTH_ENCODERS[algorithm] + + if algorithm == MD5: + # If the "algorithm" directive's value is "MD5" or is + # unspecified, then A1 is: + # A1 = unq(username-value) ":" unq(realm-value) ":" passwd + return "%s:%s:%s" % (params["username"], params["realm"], password) + + elif algorithm == MD5_SESS: + + # This is A1 if qop is set + # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) + # ":" unq(nonce-value) ":" unq(cnonce-value) + h_a1 = H ("%s:%s:%s" % (params["username"], params["realm"], password)) + return "%s:%s:%s" % (h_a1, params["nonce"], params["cnonce"]) + + +def _A2(params, method, kwargs): + # If the "qop" directive's value is "auth" or is unspecified, then A2 is: + # A2 = Method ":" digest-uri-value + + qop = params.get ("qop", "auth") + if qop == "auth": + return method + ":" + params["uri"] + elif qop == "auth-int": + # If the "qop" value is "auth-int", then A2 is: + # A2 = Method ":" digest-uri-value ":" H(entity-body) + entity_body = kwargs.get ("entity_body", "") + H = kwargs["H"] + + return "%s:%s:%s" % ( + method, + params["uri"], + H(entity_body) + ) + + else: + raise NotImplementedError ("The 'qop' method is unknown: %s" % qop) + +def _computeDigestResponse(auth_map, password, method = "GET", A1 = None,**kwargs): + """ + Generates a response respecting the algorithm defined in RFC 2617 + """ + params = auth_map + + algorithm = params.get ("algorithm", MD5) + + H = DIGEST_AUTH_ENCODERS[algorithm] + KD = lambda secret, data: H(secret + ":" + data) + + qop = params.get ("qop", None) + + H_A2 = H(_A2(params, method, kwargs)) + + if algorithm == MD5_SESS and A1 is not None: + H_A1 = H(A1) + else: + H_A1 = H(_A1(params, password)) + + if qop == "auth" or aop == "auth-int": + # If the "qop" value is "auth" or "auth-int": + # request-digest = <"> < KD ( H(A1), unq(nonce-value) + # ":" nc-value + # ":" unq(cnonce-value) + # ":" unq(qop-value) + # ":" H(A2) + # ) <"> + request = "%s:%s:%s:%s:%s" % ( + params["nonce"], + params["nc"], + params["cnonce"], + params["qop"], + H_A2, + ) + + elif qop is None: + # If the "qop" directive is not present (this construction is + # for compatibility with RFC 2069): + # request-digest = + # <"> < KD ( H(A1), unq(nonce-value) ":" H(A2) ) > <"> + request = "%s:%s" % (params["nonce"], H_A2) + + return KD(H_A1, request) + +def _checkDigestResponse(auth_map, password, method = "GET", A1 = None, **kwargs): + """This function is used to verify the response given by the client when + he tries to authenticate. + Optional arguments: + entity_body - when 'qop' is set to 'auth-int' you MUST provide the + raw data you are going to send to the client (usually the + HTML page. + request_uri - the uri from the request line compared with the 'uri' + directive of the authorization map. They must represent + the same resource (unused at this time). + """ + + if auth_map['realm'] != kwargs.get('realm', None): + return False + + response = _computeDigestResponse(auth_map, password, method, A1,**kwargs) + + return response == auth_map["response"] + +def _checkBasicResponse (auth_map, password, method='GET', encrypt=None, **kwargs): + # Note that the Basic response doesn't provide the realm value so we cannot + # test it + try: + return encrypt(auth_map["password"], auth_map["username"]) == password + except TypeError: + return encrypt(auth_map["password"]) == password + +AUTH_RESPONSES = { + "basic": _checkBasicResponse, + "digest": _checkDigestResponse, +} + +def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs): + """'checkResponse' compares the auth_map with the password and optionally + other arguments that each implementation might need. + + If the response is of type 'Basic' then the function has the following + signature: + + checkBasicResponse (auth_map, password) -> bool + + If the response is of type 'Digest' then the function has the following + signature: + + checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool + + The 'A1' argument is only used in MD5_SESS algorithm based responses. + Check md5SessionKey() for more info. + """ + global AUTH_RESPONSES + checker = AUTH_RESPONSES[auth_map["auth_scheme"]] + return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs) + + + + diff --git a/src/cherrypy/lib/profiler.py b/src/cherrypy/lib/profiler.py new file mode 100644 index 0000000000..9d5481dd7e --- /dev/null +++ b/src/cherrypy/lib/profiler.py @@ -0,0 +1,191 @@ +"""Profiler tools for CherryPy. + +CherryPy users +============== + +You can profile any of your pages as follows: + + from cherrypy.lib import profiler + + class Root: + p = profile.Profiler("/path/to/profile/dir") + + def index(self): + self.p.run(self._index) + index.exposed = True + + def _index(self): + return "Hello, world!" + + cherrypy.tree.mount(Root()) + + +You can also turn on profiling for all requests +using the make_app function as WSGI middleware. + + +CherryPy developers +=================== + +This module can be used whenever you make changes to CherryPy, +to get a quick sanity-check on overall CP performance. Use the +"--profile" flag when running the test suite. Then, use the serve() +function to browse the results in a web browser. If you run this +module from the command line, it will call serve() for you. + +""" + + +# Make profiler output more readable by adding __init__ modules' parents. +def new_func_strip_path(func_name): + filename, line, name = func_name + if filename.endswith("__init__.py"): + return os.path.basename(filename[:-12]) + filename[-12:], line, name + return os.path.basename(filename), line, name + +try: + import profile + import pstats + pstats.func_strip_path = new_func_strip_path +except ImportError: + profile = None + pstats = None + import warnings + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, you can apt-get python2.4-profiler from " + "non-free in a separate step. See http://www.cherrypy.org/wiki/" + "ProfilingOnDebian for details.") + warnings.warn(msg) + +import os, os.path +import sys + +try: + import cStringIO as StringIO +except ImportError: + import StringIO + + +_count = 0 + +class Profiler(object): + + def __init__(self, path=None): + if not path: + path = os.path.join(os.path.dirname(__file__), "profile") + self.path = path + if not os.path.exists(path): + os.makedirs(path) + + def run(self, func, *args, **params): + """Dump profile data into self.path.""" + global _count + c = _count = _count + 1 + path = os.path.join(self.path, "cp_%04d.prof" % c) + prof = profile.Profile() + result = prof.runcall(func, *args, **params) + prof.dump_stats(path) + return result + + def statfiles(self): + """statfiles() -> list of available profiles.""" + return [f for f in os.listdir(self.path) + if f.startswith("cp_") and f.endswith(".prof")] + + def stats(self, filename, sortby='cumulative'): + """stats(index) -> output of print_stats() for the given profile.""" + sio = StringIO.StringIO() + if sys.version_info >= (2, 5): + s = pstats.Stats(os.path.join(self.path, filename), stream=sio) + s.strip_dirs() + s.sort_stats(sortby) + s.print_stats() + else: + # pstats.Stats before Python 2.5 didn't take a 'stream' arg, + # but just printed to stdout. So re-route stdout. + s = pstats.Stats(os.path.join(self.path, filename)) + s.strip_dirs() + s.sort_stats(sortby) + oldout = sys.stdout + try: + sys.stdout = sio + s.print_stats() + finally: + sys.stdout = oldout + response = sio.getvalue() + sio.close() + return response + + def index(self): + return """ + CherryPy profile data + + + + + + """ + index.exposed = True + + def menu(self): + yield "

Profiling runs

" + yield "

Click on one of the runs below to see profiling data.

" + runs = self.statfiles() + runs.sort() + for i in runs: + yield "%s
" % (i, i) + menu.exposed = True + + def report(self, filename): + import cherrypy + cherrypy.response.headers['Content-Type'] = 'text/plain' + return self.stats(filename) + report.exposed = True + + +class ProfileAggregator(Profiler): + + def __init__(self, path=None): + Profiler.__init__(self, path) + global _count + self.count = _count = _count + 1 + self.profiler = profile.Profile() + + def run(self, func, *args): + path = os.path.join(self.path, "cp_%04d.prof" % self.count) + result = self.profiler.runcall(func, *args) + self.profiler.dump_stats(path) + return result + + +class make_app: + def __init__(self, nextapp, path=None, aggregate=False): + """Make a WSGI middleware app which wraps 'nextapp' with profiling.""" + self.nextapp = nextapp + self.aggregate = aggregate + if aggregate: + self.profiler = ProfileAggregator(path) + else: + self.profiler = Profiler(path) + + def __call__(self, environ, start_response): + def gather(): + result = [] + for line in self.nextapp(environ, start_response): + result.append(line) + return result + return self.profiler.run(gather) + + +def serve(path=None, port=8080): + import cherrypy + cherrypy.config.update({'server.socket_port': int(port), + 'server.thread_pool': 10, + 'environment': "production", + }) + cherrypy.quickstart(Profiler(path)) + + +if __name__ == "__main__": + serve(*tuple(sys.argv[1:])) + diff --git a/src/cherrypy/lib/safemime.py b/src/cherrypy/lib/safemime.py new file mode 100644 index 0000000000..cf41dbf27b --- /dev/null +++ b/src/cherrypy/lib/safemime.py @@ -0,0 +1,128 @@ +import cherrypy + + +class MultipartWrapper(object): + """Wraps a file-like object, returning '' when Content-Length is reached. + + The cgi module's logic for reading multipart MIME messages doesn't + allow the parts to know when the Content-Length for the entire message + has been reached, and doesn't allow for multipart-MIME messages that + omit the trailing CRLF (Flash 8's FileReference.upload(url), for example, + does this). The read_lines_to_outerboundary function gets stuck in a loop + until the socket times out. + + This rfile wrapper simply monitors the incoming stream. When a read is + attempted past the Content-Length, it returns an empty string rather + than timing out (of course, if the last read *overlaps* the C-L, you'll + get the last bit of data up to C-L, and then the next read will return + an empty string). + """ + + def __init__(self, rfile, clen): + self.rfile = rfile + self.clen = clen + self.bytes_read = 0 + + def read(self, size = None): + if self.clen: + # Return '' if we've read all the data. + if self.bytes_read >= self.clen: + return '' + + # Reduce 'size' if it's over our limit. + new_bytes_read = self.bytes_read + size + if new_bytes_read > self.clen: + size = self.clen - self.bytes_read + + data = self.rfile.read(size) + self.bytes_read += len(data) + return data + + def readline(self, size = None): + if size is not None: + if self.clen: + # Return '' if we've read all the data. + if self.bytes_read >= self.clen: + return '' + + # Reduce 'size' if it's over our limit. + new_bytes_read = self.bytes_read + size + if new_bytes_read > self.clen: + size = self.clen - self.bytes_read + + data = self.rfile.readline(size) + self.bytes_read += len(data) + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + size = 256 + while True: + if self.clen: + # Return if we've read all the data. + if self.bytes_read >= self.clen: + return ''.join(res) + + # Reduce 'size' if it's over our limit. + new_bytes_read = self.bytes_read + size + if new_bytes_read > self.clen: + size = self.clen - self.bytes_read + + data = self.rfile.readline(size) + self.bytes_read += len(data) + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < size or data[-1:] == "\n": + return ''.join(res) + + def readlines(self, sizehint = 0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self.rfile + + def next(self): + if self.clen: + # Return '' if we've read all the data. + if self.bytes_read >= self.clen: + return '' + + data = self.rfile.next() + self.bytes_read += len(data) + return data + + +def safe_multipart(flash_only=False): + """Wrap request.rfile in a reader that won't crash on no trailing CRLF.""" + h = cherrypy.request.headers + if not h.get('Content-Type').startswith('multipart/'): + return + if flash_only and not 'Shockwave Flash' in h.get('User-Agent', ''): + return + + clen = h.get('Content-Length', '0') + try: + clen = int(clen) + except ValueError: + return + cherrypy.request.rfile = MultipartWrapper(cherrypy.request.rfile, clen) + +def init(): + """Create a Tool for safe_multipart and add it to cherrypy.tools.""" + cherrypy.tools.safe_multipart = cherrypy.Tool('before_request_body', + safe_multipart) + diff --git a/src/cherrypy/lib/sessions.py b/src/cherrypy/lib/sessions.py new file mode 100644 index 0000000000..fe842192f3 --- /dev/null +++ b/src/cherrypy/lib/sessions.py @@ -0,0 +1,679 @@ +"""Session implementation for CherryPy. + +We use cherrypy.request to store some convenient variables as +well as data about the session for the current request. Instead of +polluting cherrypy.request we use a Session object bound to +cherrypy.session to store these variables. +""" + +import datetime +import os +try: + import cPickle as pickle +except ImportError: + import pickle +import random +import sha +import time +import threading +import types +from warnings import warn + +import cherrypy +from cherrypy.lib import http + + +missing = object() + +class Session(object): + """A CherryPy dict-like Session object (one per request).""" + + __metaclass__ = cherrypy._AttributeDocstrings + + _id = None + id_observers = None + id_observers__doc = "A list of callbacks to which to pass new id's." + + id__doc = "The current session ID." + def _get_id(self): + return self._id + def _set_id(self, value): + self._id = value + for o in self.id_observers: + o(value) + id = property(_get_id, _set_id, doc=id__doc) + + timeout = 60 + timeout__doc = "Number of minutes after which to delete session data." + + locked = False + locked__doc = """ + If True, this session instance has exclusive read/write access + to session data.""" + + loaded = False + loaded__doc = """ + If True, data has been retrieved from storage. This should happen + automatically on the first attempt to access session data.""" + + clean_thread = None + clean_thread__doc = "Class-level Monitor which calls self.clean_up." + + clean_freq = 5 + clean_freq__doc = "The poll rate for expired session cleanup in minutes." + + def __init__(self, id=None, **kwargs): + self.id_observers = [] + self._data = {} + + for k, v in kwargs.iteritems(): + setattr(self, k, v) + + if id is None: + self.regenerate() + else: + self.id = id + if not self._exists(): + # Expired or malicious session. Make a new one. + # See http://www.cherrypy.org/ticket/709. + self.id = None + self.regenerate() + + def regenerate(self): + """Replace the current session (with a new id).""" + if self.id is not None: + self.delete() + + old_session_was_locked = self.locked + if old_session_was_locked: + self.release_lock() + + self.id = None + while self.id is None: + self.id = self.generate_id() + # Assert that the generated id is not already stored. + if self._exists(): + self.id = None + + if old_session_was_locked: + self.acquire_lock() + + def clean_up(self): + """Clean up expired sessions.""" + pass + + try: + os.urandom(20) + except (AttributeError, NotImplementedError): + # os.urandom not available until Python 2.4. Fall back to random.random. + def generate_id(self): + """Return a new session id.""" + return sha.new('%s' % random.random()).hexdigest() + else: + def generate_id(self): + """Return a new session id.""" + return os.urandom(20).encode('hex') + + def save(self): + """Save session data.""" + try: + # If session data has never been loaded then it's never been + # accessed: no need to delete it + if self.loaded: + t = datetime.timedelta(seconds = self.timeout * 60) + expiration_time = datetime.datetime.now() + t + self._save(expiration_time) + + finally: + if self.locked: + # Always release the lock if the user didn't release it + self.release_lock() + + def load(self): + """Copy stored session data into this session instance.""" + data = self._load() + # data is either None or a tuple (session_data, expiration_time) + if data is None or data[1] < datetime.datetime.now(): + # Expired session: flush session data + self._data = {} + else: + self._data = data[0] + self.loaded = True + + # Stick the clean_thread in the class, not the instance. + # The instances are created and destroyed per-request. + cls = self.__class__ + if not cls.clean_thread: + # clean_up is in instancemethod and not a classmethod, + # so that tool config can be accessed inside the method. + t = cherrypy.process.plugins.Monitor( + cherrypy.engine, self.clean_up, self.clean_freq * 60) + t.subscribe() + cls.clean_thread = t + t.start() + + def delete(self): + """Delete stored session data.""" + self._delete() + + def __getitem__(self, key): + if not self.loaded: self.load() + return self._data[key] + + def __setitem__(self, key, value): + if not self.loaded: self.load() + self._data[key] = value + + def __delitem__(self, key): + if not self.loaded: self.load() + del self._data[key] + + def pop(self, key, default=missing): + """Remove the specified key and return the corresponding value. + If key is not found, default is returned if given, + otherwise KeyError is raised. + """ + if not self.loaded: self.load() + if default is missing: + return self._data.pop(key) + else: + return self._data.pop(key, default) + + def __contains__(self, key): + if not self.loaded: self.load() + return key in self._data + + def has_key(self, key): + """D.has_key(k) -> True if D has a key k, else False.""" + if not self.loaded: self.load() + return self._data.has_key(key) + + def get(self, key, default=None): + """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" + if not self.loaded: self.load() + return self._data.get(key, default) + + def update(self, d): + """D.update(E) -> None. Update D from E: for k in E: D[k] = E[k].""" + if not self.loaded: self.load() + self._data.update(d) + + def setdefault(self, key, default=None): + """D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D.""" + if not self.loaded: self.load() + return self._data.setdefault(key, default) + + def clear(self): + """D.clear() -> None. Remove all items from D.""" + if not self.loaded: self.load() + self._data.clear() + + def keys(self): + """D.keys() -> list of D's keys.""" + if not self.loaded: self.load() + return self._data.keys() + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples.""" + if not self.loaded: self.load() + return self._data.items() + + def values(self): + """D.values() -> list of D's values.""" + if not self.loaded: self.load() + return self._data.values() + + +class RamSession(Session): + + # Class-level objects. Don't rebind these! + cache = {} + locks = {} + + def clean_up(self): + """Clean up expired sessions.""" + now = datetime.datetime.now() + for id, (data, expiration_time) in self.cache.items(): + if expiration_time < now: + try: + del self.cache[id] + except KeyError: + pass + try: + del self.locks[id] + except KeyError: + pass + + def _exists(self): + return self.id in self.cache + + def _load(self): + return self.cache.get(self.id) + + def _save(self, expiration_time): + self.cache[self.id] = (self._data, expiration_time) + + def _delete(self): + del self.cache[self.id] + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + self.locked = True + self.locks.setdefault(self.id, threading.RLock()).acquire() + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + self.locks[self.id].release() + self.locked = False + + def __len__(self): + """Return the number of active sessions.""" + return len(self.cache) + + +class FileSession(Session): + """Implementation of the File backend for sessions + + storage_path: the folder where session data will be saved. Each session + will be saved as pickle.dump(data, expiration_time) in its own file; + the filename will be self.SESSION_PREFIX + self.id. + """ + + SESSION_PREFIX = 'session-' + LOCK_SUFFIX = '.lock' + + def setup(cls, **kwargs): + """Set up the storage system for file-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + if 'storage_path' in kwargs: + kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) + + for k, v in kwargs.iteritems(): + setattr(cls, k, v) + + # Warn if any lock files exist at startup. + lockfiles = [fname for fname in os.listdir(cls.storage_path) + if (fname.startswith(cls.SESSION_PREFIX) + and fname.endswith(cls.LOCK_SUFFIX))] + if lockfiles: + plural = ('', 's')[len(lockfiles) > 1] + warn("%s session lockfile%s found at startup. If you are " + "only running one process, then you may need to " + "manually delete the lockfiles found at %r." + % (len(lockfiles), plural, cls.storage_path)) + setup = classmethod(setup) + + def _get_file_path(self): + f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id) + if not os.path.abspath(f).startswith(self.storage_path): + raise cherrypy.HTTPError(400, "Invalid session id in cookie.") + return f + + def _exists(self): + path = self._get_file_path() + return os.path.exists(path) + + def _load(self, path=None): + if path is None: + path = self._get_file_path() + try: + f = open(path, "rb") + try: + return pickle.load(f) + finally: + f.close() + except (IOError, EOFError): + return None + + def _save(self, expiration_time): + f = open(self._get_file_path(), "wb") + try: + pickle.dump((self._data, expiration_time), f) + finally: + f.close() + + def _delete(self): + try: + os.unlink(self._get_file_path()) + except OSError: + pass + + def acquire_lock(self, path=None): + """Acquire an exclusive lock on the currently-loaded session data.""" + if path is None: + path = self._get_file_path() + path += self.LOCK_SUFFIX + while True: + try: + lockfd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL) + except OSError: + time.sleep(0.1) + else: + os.close(lockfd) + break + self.locked = True + + def release_lock(self, path=None): + """Release the lock on the currently-loaded session data.""" + if path is None: + path = self._get_file_path() + os.unlink(path + self.LOCK_SUFFIX) + self.locked = False + + def clean_up(self): + """Clean up expired sessions.""" + now = datetime.datetime.now() + # Iterate over all session files in self.storage_path + for fname in os.listdir(self.storage_path): + if (fname.startswith(self.SESSION_PREFIX) + and not fname.endswith(self.LOCK_SUFFIX)): + # We have a session file: lock and load it and check + # if it's expired. If it fails, nevermind. + path = os.path.join(self.storage_path, fname) + self.acquire_lock(path) + try: + contents = self._load(path) + # _load returns None on IOError + if contents is not None: + data, expiration_time = contents + if expiration_time < now: + # Session expired: deleting it + os.unlink(path) + finally: + self.release_lock(path) + + def __len__(self): + """Return the number of active sessions.""" + return len([fname for fname in os.listdir(self.storage_path) + if (fname.startswith(self.SESSION_PREFIX) + and not fname.endswith(self.LOCK_SUFFIX))]) + + +class PostgresqlSession(Session): + """ Implementation of the PostgreSQL backend for sessions. It assumes + a table like this: + + create table session ( + id varchar(40), + data text, + expiration_time timestamp + ) + + You must provide your own get_db function. + """ + + def __init__(self, id=None, **kwargs): + Session.__init__(self, id, **kwargs) + self.cursor = self.db.cursor() + + def setup(cls, **kwargs): + """Set up the storage system for Postgres-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + for k, v in kwargs.iteritems(): + setattr(cls, k, v) + + self.db = self.get_db() + setup = classmethod(setup) + + def __del__(self): + if self.cursor: + self.cursor.close() + self.db.commit() + + def _exists(self): + # Select session data from table + self.cursor.execute('select data, expiration_time from session ' + 'where id=%s', (self.id,)) + rows = self.cursor.fetchall() + return bool(rows) + + def _load(self): + # Select session data from table + self.cursor.execute('select data, expiration_time from session ' + 'where id=%s', (self.id,)) + rows = self.cursor.fetchall() + if not rows: + return None + + pickled_data, expiration_time = rows[0] + data = pickle.loads(pickled_data) + return data, expiration_time + + def _save(self, expiration_time): + pickled_data = pickle.dumps(self._data) + self.cursor.execute('update session set data = %s, ' + 'expiration_time = %s where id = %s', + (pickled_data, expiration_time, self.id)) + + def _delete(self): + self.cursor.execute('delete from session where id=%s', (self.id,)) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + # We use the "for update" clause to lock the row + self.locked = True + self.cursor.execute('select id from session where id=%s for update', + (self.id,)) + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + # We just close the cursor and that will remove the lock + # introduced by the "for update" clause + self.cursor.close() + self.locked = False + + def clean_up(self): + """Clean up expired sessions.""" + self.cursor.execute('delete from session where expiration_time < %s', + (datetime.datetime.now(),)) + + +class MemcachedSession(Session): + + # The most popular memcached client for Python isn't thread-safe. + # Wrap all .get and .set operations in a single lock. + mc_lock = threading.RLock() + + # This is a seperate set of locks per session id. + locks = {} + + servers = ['127.0.0.1:11211'] + + def setup(cls, **kwargs): + """Set up the storage system for memcached-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + for k, v in kwargs.iteritems(): + setattr(cls, k, v) + + import memcache + cls.cache = memcache.Client(cls.servers) + setup = classmethod(setup) + + def _exists(self): + self.mc_lock.acquire() + try: + return bool(self.cache.get(self.id)) + finally: + self.mc_lock.release() + + def _load(self): + self.mc_lock.acquire() + try: + return self.cache.get(self.id) + finally: + self.mc_lock.release() + + def _save(self, expiration_time): + # Send the expiration time as "Unix time" (seconds since 1/1/1970) + td = int(time.mktime(expiration_time.timetuple())) + self.mc_lock.acquire() + try: + if not self.cache.set(self.id, (self._data, expiration_time), td): + raise AssertionError("Session data for id %r not set." % self.id) + finally: + self.mc_lock.release() + + def _delete(self): + self.cache.delete(self.id) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + self.locked = True + self.locks.setdefault(self.id, threading.RLock()).acquire() + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + self.locks[self.id].release() + self.locked = False + + def __len__(self): + """Return the number of active sessions.""" + raise NotImplementedError + + +# Hook functions (for CherryPy tools) + +def save(): + """Save any changed session data.""" + + if not hasattr(cherrypy.serving, "session"): + return + + # Guard against running twice + if hasattr(cherrypy.request, "_sessionsaved"): + return + cherrypy.request._sessionsaved = True + + if cherrypy.response.stream: + # If the body is being streamed, we have to save the data + # *after* the response has been written out + cherrypy.request.hooks.attach('on_end_request', cherrypy.session.save) + else: + # If the body is not being streamed, we save the data now + # (so we can release the lock). + if isinstance(cherrypy.response.body, types.GeneratorType): + cherrypy.response.collapse_body() + cherrypy.session.save() +save.failsafe = True + +def close(): + """Close the session object for this request.""" + sess = getattr(cherrypy.serving, "session", None) + if getattr(sess, "locked", False): + # If the session is still locked we release the lock + sess.release_lock() +close.failsafe = True +close.priority = 90 + + +def init(storage_type='ram', path=None, path_header=None, name='session_id', + timeout=60, domain=None, secure=False, clean_freq=5, **kwargs): + """Initialize session object (using cookies). + + storage_type: one of 'ram', 'file', 'postgresql'. This will be used + to look up the corresponding class in cherrypy.lib.sessions + globals. For example, 'file' will use the FileSession class. + path: the 'path' value to stick in the response cookie metadata. + path_header: if 'path' is None (the default), then the response + cookie 'path' will be pulled from request.headers[path_header]. + name: the name of the cookie. + timeout: the expiration timeout (in minutes) for both the cookie and + stored session data. + domain: the cookie domain. + secure: if False (the default) the cookie 'secure' value will not + be set. If True, the cookie 'secure' value will be set (to 1). + clean_freq (minutes): the poll rate for expired session cleanup. + + Any additional kwargs will be bound to the new Session instance, + and may be specific to the storage type. See the subclass of Session + you're using for more information. + """ + + request = cherrypy.request + + # Guard against running twice + if hasattr(request, "_session_init_flag"): + return + request._session_init_flag = True + + # Check if request came with a session ID + id = None + if name in request.cookie: + id = request.cookie[name].value + + # Find the storage class and call setup (first time only). + storage_class = storage_type.title() + 'Session' + storage_class = globals()[storage_class] + if not hasattr(cherrypy, "session"): + if hasattr(storage_class, "setup"): + storage_class.setup(**kwargs) + + # Create and attach a new Session instance to cherrypy.serving. + # It will possess a reference to (and lock, and lazily load) + # the requested session data. + kwargs['timeout'] = timeout + kwargs['clean_freq'] = clean_freq + cherrypy.serving.session = sess = storage_class(id, **kwargs) + def update_cookie(id): + """Update the cookie every time the session id changes.""" + cherrypy.response.cookie[name] = id + sess.id_observers.append(update_cookie) + + # Create cherrypy.session which will proxy to cherrypy.serving.session + if not hasattr(cherrypy, "session"): + cherrypy.session = cherrypy._ThreadLocalProxy('session') + + set_response_cookie(path=path, path_header=path_header, name=name, + timeout=timeout, domain=domain, secure=secure) + + +def set_response_cookie(path=None, path_header=None, name='session_id', + timeout=60, domain=None, secure=False): + """Set a response cookie for the client. + + path: the 'path' value to stick in the response cookie metadata. + path_header: if 'path' is None (the default), then the response + cookie 'path' will be pulled from request.headers[path_header]. + name: the name of the cookie. + timeout: the expiration timeout for the cookie. + domain: the cookie domain. + secure: if False (the default) the cookie 'secure' value will not + be set. If True, the cookie 'secure' value will be set (to 1). + """ + # Set response cookie + cookie = cherrypy.response.cookie + cookie[name] = cherrypy.serving.session.id + cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header) + or '/') + + # We'd like to use the "max-age" param as indicated in + # http://www.faqs.org/rfcs/rfc2109.html but IE doesn't + # save it to disk and the session is lost if people close + # the browser. So we have to use the old "expires" ... sigh ... +## cookie[name]['max-age'] = timeout * 60 + if timeout: + cookie[name]['expires'] = http.HTTPDate(time.time() + (timeout * 60)) + if domain is not None: + cookie[name]['domain'] = domain + if secure: + cookie[name]['secure'] = 1 + + +def expire(): + """Expire the current session cookie.""" + name = cherrypy.request.config.get('tools.sessions.name', 'session_id') + one_year = 60 * 60 * 24 * 365 + exp = time.gmtime(time.time() - one_year) + t = time.strftime("%a, %d-%b-%Y %H:%M:%S GMT", exp) + cherrypy.response.cookie[name]['expires'] = t + + diff --git a/src/cherrypy/lib/static.py b/src/cherrypy/lib/static.py new file mode 100644 index 0000000000..e9df62f7bd --- /dev/null +++ b/src/cherrypy/lib/static.py @@ -0,0 +1,227 @@ +import mimetypes +mimetypes.init() +mimetypes.types_map['.dwg']='image/x-dwg' +mimetypes.types_map['.ico']='image/x-icon' + +import os +import re +import stat +import time +import urllib + +import cherrypy +from cherrypy.lib import cptools, http + + +def serve_file(path, content_type=None, disposition=None, name=None): + """Set status, headers, and body in order to serve the given file. + + The Content-Type header will be set to the content_type arg, if provided. + If not provided, the Content-Type will be guessed by the file extension + of the 'path' argument. + + If disposition is not None, the Content-Disposition header will be set + to "; filename=". If name is None, it will be set + to the basename of path. If disposition is None, no Content-Disposition + header will be written. + """ + + response = cherrypy.response + + # If path is relative, users should fix it by making path absolute. + # That is, CherryPy should not guess where the application root is. + # It certainly should *not* use cwd (since CP may be invoked from a + # variety of paths). If using tools.static, you can make your relative + # paths become absolute by supplying a value for "tools.static.root". + if not os.path.isabs(path): + raise ValueError("'%s' is not an absolute path." % path) + + try: + st = os.stat(path) + except OSError: + raise cherrypy.NotFound() + + # Check if path is a directory. + if stat.S_ISDIR(st.st_mode): + # Let the caller deal with it as they like. + raise cherrypy.NotFound() + + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = http.HTTPDate(st.st_mtime) + cptools.validate_since() + + if content_type is None: + # Set content-type based on filename extension + ext = "" + i = path.rfind('.') + if i != -1: + ext = path[i:].lower() + content_type = mimetypes.types_map.get(ext, "text/plain") + response.headers['Content-Type'] = content_type + + if disposition is not None: + if name is None: + name = os.path.basename(path) + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + c_len = st.st_size + bodyfile = open(path, 'rb') + + # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code + if cherrypy.request.protocol >= (1, 1): + response.headers["Accept-Ranges"] = "bytes" + r = http.get_ranges(cherrypy.request.headers.get('Range'), c_len) + if r == []: + response.headers['Content-Range'] = "bytes */%s" % c_len + message = "Invalid Range (first-byte-pos greater than Content-Length)" + raise cherrypy.HTTPError(416, message) + if r: + if len(r) == 1: + # Return a single-part response. + start, stop = r[0] + r_len = stop - start + response.status = "206 Partial Content" + response.headers['Content-Range'] = ("bytes %s-%s/%s" % + (start, stop - 1, c_len)) + response.headers['Content-Length'] = r_len + bodyfile.seek(start) + response.body = bodyfile.read(r_len) + else: + # Return a multipart/byteranges response. + response.status = "206 Partial Content" + import mimetools + boundary = mimetools.choose_boundary() + ct = "multipart/byteranges; boundary=%s" % boundary + response.headers['Content-Type'] = ct + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + + def file_ranges(): + # Apache compatibility: + yield "\r\n" + + for start, stop in r: + yield "--" + boundary + yield "\r\nContent-type: %s" % content_type + yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" + % (start, stop - 1, c_len)) + bodyfile.seek(start) + yield bodyfile.read(stop - start) + yield "\r\n" + # Final boundary + yield "--" + boundary + "--" + + # Apache compatibility: + yield "\r\n" + response.body = file_ranges() + else: + response.headers['Content-Length'] = c_len + response.body = bodyfile + else: + response.headers['Content-Length'] = c_len + response.body = bodyfile + return response.body + +def serve_download(path, name=None): + """Serve 'path' as an application/x-download attachment.""" + # This is such a common idiom I felt it deserved its own wrapper. + return serve_file(path, "application/x-download", "attachment", name) + + +def _attempt(filename, content_types): + try: + # you can set the content types for a + # complete directory per extension + content_type = None + if content_types: + r, ext = os.path.splitext(filename) + content_type = content_types.get(ext[1:], None) + serve_file(filename, content_type=content_type) + return True + except cherrypy.NotFound: + # If we didn't find the static file, continue handling the + # request. We might find a dynamic handler instead. + return False + +def staticdir(section, dir, root="", match="", content_types=None, index=""): + """Serve a static resource from the given (root +) dir. + + If 'match' is given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + If content_types is given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). + + If 'index' is provided, it should be the (relative) name of a file to + serve for directory requests. For example, if the dir argument is + '/home/me', the Request-URI is 'myapp', and the index arg is + 'index.html', the file '/home/me/myapp/index.html' will be sought. + """ + if match and not re.search(match, cherrypy.request.path_info): + return False + + # Allow the use of '~' to refer to a user's home directory. + dir = os.path.expanduser(dir) + + # If dir is relative, make absolute using "root". + if not os.path.isabs(dir): + if not root: + msg = "Static dir requires an absolute dir (or root)." + raise ValueError(msg) + dir = os.path.join(root, dir) + + # Determine where we are in the object tree relative to 'section' + # (where the static tool was defined). + if section == 'global': + section = "/" + section = section.rstrip(r"\/") + branch = cherrypy.request.path_info[len(section) + 1:] + branch = urllib.unquote(branch.lstrip(r"\/")) + + # If branch is "", filename will end in a slash + filename = os.path.join(dir, branch) + + # There's a chance that the branch pulled from the URL might + # have ".." or similar uplevel attacks in it. Check that the final + # filename is a child of dir. + if not os.path.normpath(filename).startswith(os.path.normpath(dir)): + raise cherrypy.HTTPError(403) # Forbidden + + handled = _attempt(filename, content_types) + if not handled: + # Check for an index file if a folder was requested. + if index: + handled = _attempt(os.path.join(filename, index), content_types) + if handled: + cherrypy.request.is_index = filename[-1] in (r"\/") + return handled + +def staticfile(filename, root=None, match="", content_types=None): + """Serve a static resource from the given (root +) filename. + + If 'match' is given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + If content_types is given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). + """ + if match and not re.search(match, cherrypy.request.path_info): + return False + + # If filename is relative, make absolute using "root". + if not os.path.isabs(filename): + if not root: + msg = "Static tool requires an absolute filename (got '%s')." % filename + raise ValueError(msg) + filename = os.path.join(root, filename) + + return _attempt(filename, content_types) diff --git a/src/cherrypy/lib/tidy.py b/src/cherrypy/lib/tidy.py new file mode 100644 index 0000000000..ed337c6a1e --- /dev/null +++ b/src/cherrypy/lib/tidy.py @@ -0,0 +1,184 @@ +"""Functions to run cherrypy.response through Tidy or NSGML.""" + +import cgi +import os +import StringIO +import traceback + +import cherrypy + +def tidy(temp_dir, tidy_path, strict_xml=False, errors_to_ignore=None, + indent=False, wrap=False, warnings=True): + """Run cherrypy.response through Tidy. + + If either 'indent' or 'wrap' are specified, then response.body will be + set to the output of tidy. Otherwise, only errors (including warnings, + if warnings is True) will change the body. + + Note that we use the standalone Tidy tool rather than the python + mxTidy module. This is because this module does not seem to be + stable and it crashes on some HTML pages (which means that the + server would also crash) + """ + response = cherrypy.response + + # the tidy tool, by its very nature it's not generator friendly, + # so we just collapse the body and work with it. + orig_body = response.collapse_body() + + fct = response.headers.get('Content-Type', '') + ct = fct.split(';')[0] + encoding = '' + i = fct.find('charset=') + if i != -1: + encoding = fct[i + 8:] + + if ct == 'text/html': + page_file = os.path.join(temp_dir, 'page.html') + open(page_file, 'wb').write(orig_body) + + out_file = os.path.join(temp_dir, 'tidy.out') + err_file = os.path.join(temp_dir, 'tidy.err') + tidy_enc = encoding.replace('-', '') + if tidy_enc: + tidy_enc = '-' + tidy_enc + + strict_xml = ("", " -xml")[bool(strict_xml)] + + if indent: + indent = ' -indent' + else: + indent = '' + + if wrap is False: + wrap = '' + else: + try: + wrap = ' -wrap %d' % int(tidyWrap) + except: + wrap = '' + + result = os.system('"%s" %s%s%s%s -f %s -o %s %s' % + (tidy_path, tidy_enc, strict_xml, indent, wrap, + err_file, out_file, page_file)) + use_output = bool(indent or wrap) and not result + if use_output: + output = open(out_file, 'rb').read() + + new_errs = [] + for err in open(err_file, 'rb').read().splitlines(): + if (err.find('Error') != -1 or + (warnings and err.find('Warning') != -1)): + ignore = 0 + for err_ign in errors_to_ignore or []: + if err.find(err_ign) != -1: + ignore = 1 + break + if not ignore: + new_errs.append(err) + + if new_errs: + response.body = wrong_content('
'.join(new_errs), orig_body) + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return + elif strict_xml: + # The HTML is OK, but is it valid XML? + # Use elementtree to parse XML + from elementtree.ElementTree import parse + tag_list = ['nbsp', 'quot'] + for tag in tag_list: + orig_body = orig_body.replace('&' + tag + ';', tag.upper()) + + if encoding: + enctag = '' % encoding + orig_body = enctag + orig_body + + f = StringIO.StringIO(orig_body) + try: + tree = parse(f) + except: + # Wrong XML + body_file = StringIO.StringIO() + traceback.print_exc(file = body_file) + body_file = '
'.join(body_file.getvalue()) + response.body = wrong_content(body_file, orig_body, "XML") + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return + + if use_output: + response.body = [output] + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + +def html_space(text): + """Escape text, replacing space with nbsp and tab with 4 nbsp's.""" + return cgi.escape(text).replace('\t', ' ').replace(' ', ' ') + +def html_break(text): + """Escape text, replacing newline with HTML br element.""" + return cgi.escape(text).replace('\n', '
') + +def wrong_content(header, body, content_type="HTML"): + output = ["Wrong %s:
%s
" % (content_type, html_break(header))] + for i, line in enumerate(body.splitlines()): + output.append("%03d - %s" % (i + 1, html_space(line))) + return "
".join(output) + + +def nsgmls(temp_dir, nsgmls_path, catalog_path, errors_to_ignore=None): + response = cherrypy.response + + # the tidy tool, by its very nature it's not generator friendly, + # so we just collect the body and work with it. + orig_body = response.collapse_body() + + fct = response.headers.get('Content-Type', '') + ct = fct.split(';')[0] + encoding = '' + i = fct.find('charset=') + if i != -1: + encoding = fct[i + 8:] + if ct == 'text/html': + # Remove bits of Javascript (nsgmls doesn't seem to handle + # them correctly (for instance, if ', i) + if j == -1: + break + orig_body = orig_body[:i] + orig_body[j+9:] + + page_file = os.path.join(temp_dir, 'page.html') + open(page_file, 'wb').write(orig_body) + + err_file = os.path.join(temp_dir, 'nsgmls.err') + command = ('%s -c%s -f%s -s -E10 %s' % + (nsgmls_path, catalog_path, err_file, page_file)) + command = command.replace('\\', '/') + os.system(command) + errs = open(err_file, 'rb').read() + + new_errs = [] + for err in errs.splitlines(): + ignore = False + for err_ign in errors_to_ignore or []: + if err.find(err_ign) != -1: + ignore = True + break + if not ignore: + new_errs.append(err) + + if new_errs: + response.body = wrong_content('
'.join(new_errs), orig_body) + if response.headers.has_key("Content-Length"): + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + diff --git a/src/cherrypy/lib/wsgiapp.py b/src/cherrypy/lib/wsgiapp.py new file mode 100644 index 0000000000..8aeb5755d9 --- /dev/null +++ b/src/cherrypy/lib/wsgiapp.py @@ -0,0 +1,77 @@ +"""A CherryPy tool for hosting a foreign WSGI application.""" + +import sys +import warnings + +import cherrypy + + +# is this sufficient for start_response? +def start_response(status, response_headers, exc_info=None): + cherrypy.response.status = status + headers_dict = dict(response_headers) + cherrypy.response.headers.update(headers_dict) + +def make_environ(): + """grabbed some of below from wsgiserver.py + + for hosting WSGI apps in non-WSGI environments (yikes!) + """ + + request = cherrypy.request + + # create and populate the wsgi environ + environ = dict() + environ["wsgi.version"] = (1,0) + environ["wsgi.url_scheme"] = request.scheme + environ["wsgi.input"] = request.rfile + environ["wsgi.errors"] = sys.stderr + environ["wsgi.multithread"] = True + environ["wsgi.multiprocess"] = False + environ["wsgi.run_once"] = False + environ["REQUEST_METHOD"] = request.method + environ["SCRIPT_NAME"] = request.script_name + environ["PATH_INFO"] = request.path_info + environ["QUERY_STRING"] = request.query_string + environ["SERVER_PROTOCOL"] = request.protocol + environ["SERVER_NAME"] = request.local.name + environ["SERVER_PORT"] = request.local.port + environ["REMOTE_HOST"] = request.remote.name + environ["REMOTE_ADDR"] = request.remote.ip + environ["REMOTE_PORT"] = request.remote.port + # then all the http headers + headers = request.headers + environ["CONTENT_TYPE"] = headers.get("Content-type", "") + environ["CONTENT_LENGTH"] = headers.get("Content-length", "") + for (k, v) in headers.iteritems(): + envname = "HTTP_" + k.upper().replace("-","_") + environ[envname] = v + return environ + + +def run(app, env=None): + """Run the given WSGI app and set response.body to its output.""" + warnings.warn("This module is deprecated and will be removed in " + "Cherrypy 3.2. See http://www.cherrypy.org/ticket/700 " + "for more information.") + + try: + environ = cherrypy.request.wsgi_environ.copy() + environ['SCRIPT_NAME'] = cherrypy.request.script_name + environ['PATH_INFO'] = cherrypy.request.path_info + except AttributeError: + environ = make_environ() + + if env: + environ.update(env) + + # run the wsgi app and have it set response.body + response = app(environ, start_response) + try: + cherrypy.response.body = [x for x in response] + finally: + if hasattr(response, "close"): + response.close() + + return True + diff --git a/src/cherrypy/lib/xmlrpc.py b/src/cherrypy/lib/xmlrpc.py new file mode 100644 index 0000000000..c95970f205 --- /dev/null +++ b/src/cherrypy/lib/xmlrpc.py @@ -0,0 +1,49 @@ +import sys + +import cherrypy + + +def process_body(): + """Return (params, method) from request body.""" + try: + import xmlrpclib + return xmlrpclib.loads(cherrypy.request.body.read()) + except Exception: + return ('ERROR PARAMS', ), 'ERRORMETHOD' + + +def patched_path(path): + """Return 'path', doctored for RPC.""" + if not path.endswith('/'): + path += '/' + if path.startswith('/RPC2/'): + # strip the first /rpc2 + path = path[5:] + return path + + +def _set_response(body): + # The XML-RPC spec (http://www.xmlrpc.com/spec) says: + # "Unless there's a lower-level error, always return 200 OK." + # Since Python's xmlrpclib interprets a non-200 response + # as a "Protocol Error", we'll just return 200 every time. + response = cherrypy.response + response.status = '200 OK' + response.body = body + response.headers['Content-Type'] = 'text/xml' + response.headers['Content-Length'] = len(body) + + +def respond(body, encoding='utf-8', allow_none=0): + import xmlrpclib + if not isinstance(body, xmlrpclib.Fault): + body = (body,) + _set_response(xmlrpclib.dumps(body, methodresponse=1, + encoding=encoding, + allow_none=allow_none)) + +def on_error(): + body = str(sys.exc_info()[1]) + import xmlrpclib + _set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body))) + diff --git a/src/cherrypy/process/__init__.py b/src/cherrypy/process/__init__.py new file mode 100644 index 0000000000..f15b12370a --- /dev/null +++ b/src/cherrypy/process/__init__.py @@ -0,0 +1,14 @@ +"""Site container for an HTTP server. + +A Web Site Process Bus object is used to connect applications, servers, +and frameworks with site-wide services such as daemonization, process +reload, signal handling, drop privileges, PID file management, logging +for all of these, and many more. + +The 'plugins' module defines a few abstract and concrete services for +use with the bus. Some use tool-specific channels; see the documentation +for each class. +""" + +from cherrypy.process.wspbus import bus +from cherrypy.process import plugins, servers diff --git a/src/cherrypy/process/plugins.py b/src/cherrypy/process/plugins.py new file mode 100644 index 0000000000..7f3cde7ec8 --- /dev/null +++ b/src/cherrypy/process/plugins.py @@ -0,0 +1,524 @@ +"""Site services for use with a Web Site Process Bus.""" + +import os +import re +try: + set +except NameError: + from sets import Set as set +import signal as _signal +import sys +import time +import threading + + +class SimplePlugin(object): + """Plugin base class which auto-subscribes methods for known channels.""" + + def __init__(self, bus): + self.bus = bus + + def subscribe(self): + """Register this object as a (multi-channel) listener on the bus.""" + for channel in self.bus.listeners: + method = getattr(self, channel, None) + if method is not None: + self.bus.subscribe(channel, method) + + def unsubscribe(self): + """Unregister this object as a listener on the bus.""" + for channel in self.bus.listeners: + method = getattr(self, channel, None) + if method is not None: + self.bus.unsubscribe(channel, method) + + + +class SignalHandler(object): + """Register bus channels (and listeners) for system signals. + + By default, instantiating this object subscribes the following signals + and listeners: + + TERM: bus.exit + HUP : bus.restart + USR1: bus.graceful + """ + + # Map from signal numbers to names + signals = {} + for k, v in vars(_signal).items(): + if k.startswith('SIG') and not k.startswith('SIG_'): + signals[v] = k + del k, v + + def __init__(self, bus): + self.bus = bus + # Set default handlers + self.handlers = {'SIGTERM': self.bus.exit, + 'SIGHUP': self.handle_SIGHUP, + 'SIGUSR1': self.bus.graceful, + } + + self._previous_handlers = {} + + def subscribe(self): + for sig, func in self.handlers.iteritems(): + try: + self.set_handler(sig, func) + except ValueError: + pass + + def unsubscribe(self): + for signum, handler in self._previous_handlers.iteritems(): + signame = self.signals[signum] + + if handler is None: + self.bus.log("Restoring %s handler to SIG_DFL." % signame) + handler = _signal.SIG_DFL + else: + self.bus.log("Restoring %s handler %r." % (signame, handler)) + + try: + our_handler = _signal.signal(signum, handler) + if our_handler is None: + self.bus.log("Restored old %s handler %r, but our " + "handler was not registered." % + (signame, handler), level=30) + except ValueError: + self.bus.log("Unable to restore %s handler %r." % + (signame, handler), level=40, traceback=True) + + def set_handler(self, signal, listener=None): + """Subscribe a handler for the given signal (number or name). + + If the optional 'listener' argument is provided, it will be + subscribed as a listener for the given signal's channel. + + If the given signal name or number is not available on the current + platform, ValueError is raised. + """ + if isinstance(signal, basestring): + signum = getattr(_signal, signal, None) + if signum is None: + raise ValueError("No such signal: %r" % signal) + signame = signal + else: + try: + signame = self.signals[signal] + except KeyError: + raise ValueError("No such signal: %r" % signal) + signum = signal + + prev = _signal.signal(signum, self._handle_signal) + self._previous_handlers[signum] = prev + + if listener is not None: + self.bus.log("Listening for %s." % signame) + self.bus.subscribe(signame, listener) + + def _handle_signal(self, signum=None, frame=None): + """Python signal handler (self.set_handler subscribes it for you).""" + signame = self.signals[signum] + self.bus.log("Caught signal %s." % signame) + self.bus.publish(signame) + + def handle_SIGHUP(self): + if os.isatty(sys.stdin.fileno()): + # not daemonized (may be foreground or background) + self.bus.log("SIGHUP caught but not daemonized. Exiting.") + self.bus.exit() + else: + self.bus.log("SIGHUP caught while daemonized. Restarting.") + self.bus.restart() + + +try: + import pwd, grp +except ImportError: + pwd, grp = None, None + + +class DropPrivileges(SimplePlugin): + """Drop privileges. uid/gid arguments not available on Windows. + + Special thanks to Gavin Baker: http://antonym.org/node/100. + """ + + def __init__(self, bus, umask=None, uid=None, gid=None): + SimplePlugin.__init__(self, bus) + self.finalized = False + self.uid = uid + self.gid = gid + self.umask = umask + + def _get_uid(self): + return self._uid + def _set_uid(self, val): + if val is not None: + if pwd is None: + self.bus.log("pwd module not available; ignoring uid.", + level=30) + val = None + elif isinstance(val, basestring): + val = pwd.getpwnam(val)[2] + self._uid = val + uid = property(_get_uid, _set_uid, doc="The uid under which to run.") + + def _get_gid(self): + return self._gid + def _set_gid(self, val): + if val is not None: + if grp is None: + self.bus.log("grp module not available; ignoring gid.", + level=30) + val = None + elif isinstance(val, basestring): + val = grp.getgrnam(val)[2] + self._gid = val + gid = property(_get_gid, _set_gid, doc="The gid under which to run.") + + def _get_umask(self): + return self._umask + def _set_umask(self, val): + if val is not None: + try: + os.umask + except AttributeError: + self.bus.log("umask function not available; ignoring umask.", + level=30) + val = None + self._umask = val + umask = property(_get_umask, _set_umask, doc="The umask under which to run.") + + def start(self): + # uid/gid + def current_ids(): + """Return the current (uid, gid) if available.""" + name, group = None, None + if pwd: + name = pwd.getpwuid(os.getuid())[0] + if grp: + group = grp.getgrgid(os.getgid())[0] + return name, group + + if self.finalized: + if not (self.uid is None and self.gid is None): + self.bus.log('Already running as uid: %r gid: %r' % + current_ids()) + else: + if self.uid is None and self.gid is None: + if pwd or grp: + self.bus.log('uid/gid not set', level=30) + else: + self.bus.log('Started as uid: %r gid: %r' % current_ids()) + if self.gid is not None: + os.setgid(gid) + if self.uid is not None: + os.setuid(uid) + self.bus.log('Running as uid: %r gid: %r' % current_ids()) + + # umask + if self.finalized: + if self.umask is not None: + self.bus.log('umask already set to: %03o' % self.umask) + else: + if self.umask is None: + self.bus.log('umask not set', level=30) + else: + old_umask = os.umask(self.umask) + self.bus.log('umask old: %03o, new: %03o' % + (old_umask, self.umask)) + + self.finalized = True + start.priority = 75 + + +class Daemonizer(SimplePlugin): + """Daemonize the running script. + + Use this with a Web Site Process Bus via: + + Daemonizer(bus).subscribe() + + When this component finishes, the process is completely decoupled from + the parent environment. Please note that when this component is used, + the return code from the parent process will still be 0 if a startup + error occurs in the forked children. Errors in the initial daemonizing + process still return proper exit codes. Therefore, if you use this + plugin to daemonize, don't use the return code as an accurate indicator + of whether the process fully started. In fact, that return code only + indicates if the process succesfully finished the first fork. + """ + + def __init__(self, bus, stdin='/dev/null', stdout='/dev/null', + stderr='/dev/null'): + SimplePlugin.__init__(self, bus) + self.stdin = stdin + self.stdout = stdout + self.stderr = stderr + self.finalized = False + + def start(self): + if self.finalized: + self.bus.log('Already deamonized.') + + # forking has issues with threads: + # http://www.opengroup.org/onlinepubs/000095399/functions/fork.html + # "The general problem with making fork() work in a multi-threaded + # world is what to do with all of the threads..." + # So we check for active threads: + if threading.activeCount() != 1: + self.bus.log('There are %r active threads. ' + 'Daemonizing now may cause strange failures.' % + threading.enumerate(), level=30) + + # See http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 + # (or http://www.faqs.org/faqs/unix-faq/programmer/faq/ section 1.7) + # and http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012 + + # Finish up with the current stdout/stderr + sys.stdout.flush() + sys.stderr.flush() + + # Do first fork. + try: + pid = os.fork() + if pid == 0: + # This is the child process. Continue. + pass + else: + # This is the first parent. Exit, now that we've forked. + self.bus.log('Forking once.') + os._exit(0) + except OSError, exc: + # Python raises OSError rather than returning negative numbers. + sys.exit("%s: fork #1 failed: (%d) %s\n" + % (sys.argv[0], exc.errno, exc.strerror)) + + os.setsid() + + # Do second fork + try: + pid = os.fork() + if pid > 0: + self.bus.log('Forking twice.') + os._exit(0) # Exit second parent + except OSError, exc: + sys.exit("%s: fork #2 failed: (%d) %s\n" + % (sys.argv[0], exc.errno, exc.strerror)) + + os.chdir("/") + os.umask(0) + + si = open(self.stdin, "r") + so = open(self.stdout, "a+") + se = open(self.stderr, "a+", 0) + + # os.dup2(fd, fd2) will close fd2 if necessary, + # so we don't explicitly close stdin/out/err. + # See http://docs.python.org/lib/os-fd-ops.html + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + self.bus.log('Daemonized to PID: %s' % os.getpid()) + self.finalized = True + start.priority = 65 + + +class PIDFile(SimplePlugin): + """Maintain a PID file via a WSPBus.""" + + def __init__(self, bus, pidfile): + SimplePlugin.__init__(self, bus) + self.pidfile = pidfile + self.finalized = False + + def start(self): + pid = os.getpid() + if self.finalized: + self.bus.log('PID %r already written to %r.' % (pid, self.pidfile)) + else: + open(self.pidfile, "wb").write(str(pid)) + self.bus.log('PID %r written to %r.' % (pid, self.pidfile)) + self.finalized = True + start.priority = 70 + + def exit(self): + try: + os.remove(self.pidfile) + self.bus.log('PID file removed: %r.' % self.pidfile) + except (KeyboardInterrupt, SystemExit): + raise + except: + pass + + +class PerpetualTimer(threading._Timer): + """A subclass of threading._Timer whose run() method repeats.""" + + def run(self): + while True: + self.finished.wait(self.interval) + if self.finished.isSet(): + return + self.function(*self.args, **self.kwargs) + + +class Monitor(SimplePlugin): + """WSPBus listener to periodically run a callback in its own thread. + + bus: a Web Site Process Bus object. + callback: the function to call at intervals. + frequency: the time in seconds between callback runs. + """ + + frequency = 60 + + def __init__(self, bus, callback, frequency=60): + SimplePlugin.__init__(self, bus) + self.callback = callback + self.frequency = frequency + self.thread = None + + def start(self): + """Start our callback in its own perpetual timer thread.""" + if self.frequency > 0: + threadname = self.__class__.__name__ + if self.thread is None: + self.thread = PerpetualTimer(self.frequency, self.callback) + self.thread.setName(threadname) + self.thread.start() + self.bus.log("Started monitor thread %r." % threadname) + else: + self.bus.log("Monitor thread %r already started." % threadname) + start.priority = 70 + + def stop(self): + """Stop our callback's perpetual timer thread.""" + if self.thread is None: + self.bus.log("No thread running for %s." % self.__class__.__name__) + else: + if self.thread is not threading.currentThread(): + name = self.thread.getName() + self.thread.cancel() + self.thread.join() + self.bus.log("Stopped thread %r." % name) + self.thread = None + + def graceful(self): + """Stop the callback's perpetual timer thread and restart it.""" + self.stop() + self.start() + + +class Autoreloader(Monitor): + """Monitor which re-executes the process when files change.""" + + frequency = 1 + match = '.*' + + def __init__(self, bus, frequency=1, match='.*'): + self.mtimes = {} + self.files = set() + self.match = match + Monitor.__init__(self, bus, self.run, frequency) + + def start(self): + """Start our own perpetual timer thread for self.run.""" + if self.thread is None: + self.mtimes = {} + Monitor.start(self) + start.priority = 70 + + def run(self): + """Reload the process if registered files have been modified.""" + sysfiles = set() + for k, m in sys.modules.items(): + if re.match(self.match, k): + if hasattr(m, '__loader__'): + if hasattr(m.__loader__, 'archive'): + k = m.__loader__.archive + k = getattr(m, '__file__', None) + sysfiles.add(k) + + for filename in sysfiles | self.files: + if filename: + if filename.endswith('.pyc'): + filename = filename[:-1] + + oldtime = self.mtimes.get(filename, 0) + if oldtime is None: + # Module with no .py file. Skip it. + continue + + try: + mtime = os.stat(filename).st_mtime + except OSError: + # Either a module with no .py file, or it's been deleted. + mtime = None + + if filename not in self.mtimes: + # If a module has no .py file, this will be None. + self.mtimes[filename] = mtime + else: + if mtime is None or mtime > oldtime: + # The file has been deleted or modified. + self.bus.log("Restarting because %s changed." % filename) + self.thread.cancel() + self.bus.log("Stopped thread %r." % self.thread.getName()) + self.bus.restart() + return + + +class ThreadManager(SimplePlugin): + """Manager for HTTP request threads. + + If you have control over thread creation and destruction, publish to + the 'acquire_thread' and 'release_thread' channels (for each thread). + This will register/unregister the current thread and publish to + 'start_thread' and 'stop_thread' listeners in the bus as needed. + + If threads are created and destroyed by code you do not control + (e.g., Apache), then, at the beginning of every HTTP request, + publish to 'acquire_thread' only. You should not publish to + 'release_thread' in this case, since you do not know whether + the thread will be re-used or not. The bus will call + 'stop_thread' listeners for you when it stops. + """ + + def __init__(self, bus): + self.threads = {} + SimplePlugin.__init__(self, bus) + self.bus.listeners.setdefault('acquire_thread', set()) + self.bus.listeners.setdefault('release_thread', set()) + + def acquire_thread(self): + """Run 'start_thread' listeners for the current thread. + + If the current thread has already been seen, any 'start_thread' + listeners will not be run again. + """ + thread_ident = threading._get_ident() + if thread_ident not in self.threads: + # We can't just use _get_ident as the thread ID + # because some platforms reuse thread ID's. + i = len(self.threads) + 1 + self.threads[thread_ident] = i + self.bus.publish('start_thread', i) + + def release_thread(self): + """Release the current thread and run 'stop_thread' listeners.""" + thread_ident = threading._get_ident() + i = self.threads.pop(thread_ident, None) + if i is not None: + self.bus.publish('stop_thread', i) + + def stop(self): + """Release all threads and run all 'stop_thread' listeners.""" + for thread_ident, i in self.threads.iteritems(): + self.bus.publish('stop_thread', i) + self.threads.clear() + graceful = stop + diff --git a/src/cherrypy/process/servers.py b/src/cherrypy/process/servers.py new file mode 100644 index 0000000000..f4baf83318 --- /dev/null +++ b/src/cherrypy/process/servers.py @@ -0,0 +1,224 @@ +"""Adapt an HTTP server.""" + +import time + + +class ServerAdapter(object): + """Adapter for an HTTP server. + + If you need to start more than one HTTP server (to serve on multiple + ports, or protocols, etc.), you can manually register each one and then + start them all with bus.start: + + s1 = ServerAdapter(bus, MyWSGIServer(host='0.0.0.0', port=80)) + s2 = ServerAdapter(bus, another.HTTPServer(host='127.0.0.1', SSL=True)) + s1.subscribe() + s2.subscribe() + bus.start() + """ + + def __init__(self, bus, httpserver=None, bind_addr=None): + self.bus = bus + self.httpserver = httpserver + self.bind_addr = bind_addr + self.interrupt = None + self.running = False + + def subscribe(self): + self.bus.subscribe('start', self.start) + self.bus.subscribe('stop', self.stop) + + def unsubscribe(self): + self.bus.unsubscribe('start', self.start) + self.bus.unsubscribe('stop', self.stop) + + def start(self): + """Start the HTTP server.""" + if isinstance(self.bind_addr, tuple): + host, port = self.bind_addr + on_what = "%s:%s" % (host, port) + else: + on_what = "socket file: %s" % self.bind_addr + + if self.running: + self.bus.log("Already serving on %s" % on_what) + return + + self.interrupt = None + if not self.httpserver: + raise ValueError("No HTTP server has been created.") + + # Start the httpserver in a new thread. + if isinstance(self.bind_addr, tuple): + wait_for_free_port(*self.bind_addr) + + import threading + t = threading.Thread(target=self._start_http_thread) + t.setName("HTTPServer " + t.getName()) + t.start() + + self.wait() + self.running = True + self.bus.log("Serving on %s" % on_what) + start.priority = 75 + + def _start_http_thread(self): + """HTTP servers MUST be running in new threads, so that the + main thread persists to receive KeyboardInterrupt's. If an + exception is raised in the httpserver's thread then it's + trapped here, and the bus (and therefore our httpserver) + are shut down. + """ + try: + self.httpserver.start() + except KeyboardInterrupt, exc: + self.bus.log(" hit: shutting down HTTP server") + self.interrupt = exc + self.bus.exit() + except SystemExit, exc: + self.bus.log("SystemExit raised: shutting down HTTP server") + self.interrupt = exc + self.bus.exit() + raise + except: + import sys + self.interrupt = sys.exc_info()[1] + self.bus.log("Error in HTTP server: shutting down", + traceback=True, level=40) + self.bus.exit() + raise + + def wait(self): + """Wait until the HTTP server is ready to receive requests.""" + while not getattr(self.httpserver, "ready", False): + if self.interrupt: + raise self.interrupt + time.sleep(.1) + + # Wait for port to be occupied + if isinstance(self.bind_addr, tuple): + host, port = self.bind_addr + wait_for_occupied_port(host, port) + + def stop(self): + """Stop the HTTP server.""" + if self.running: + # stop() MUST block until the server is *truly* stopped. + self.httpserver.stop() + # Wait for the socket to be truly freed. + if isinstance(self.bind_addr, tuple): + wait_for_free_port(*self.bind_addr) + self.running = False + self.bus.log("HTTP Server %s shut down" % self.httpserver) + else: + self.bus.log("HTTP Server %s already shut down" % self.httpserver) + stop.priority = 25 + + def restart(self): + """Restart the HTTP server.""" + self.stop() + self.start() + + +class FlupFCGIServer(object): + """Adapter for a flup.server.fcgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + from flup.server.fcgi import WSGIServer + self.fcgiserver = WSGIServer(*args, **kwargs) + # TODO: report this bug upstream to flup. + # If we don't set _oldSIGs on Windows, we get: + # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", + # line 108, in run + # self._restoreSignalHandlers() + # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", + # line 156, in _restoreSignalHandlers + # for signum,handler in self._oldSIGs: + # AttributeError: 'WSGIServer' object has no attribute '_oldSIGs' + self.fcgiserver._oldSIGs = [] + self.ready = False + + def start(self): + """Start the FCGI server.""" + self.ready = True + self.fcgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + self.ready = False + # Forcibly stop the fcgi server main event loop. + self.fcgiserver._keepGoing = False + # Force all worker threads to die off. + self.fcgiserver._threadPool.maxSpare = 0 + + +def client_host(server_host): + """Return the host on which a client can connect to the given listener.""" + if server_host == '0.0.0.0': + # 0.0.0.0 is INADDR_ANY, which should answer on localhost. + return '127.0.0.1' + if server_host == '::': + # :: is IN6ADDR_ANY, which should answer on localhost. + return '::1' + return server_host + +def check_port(host, port, timeout=1.0): + """Raise an error if the given port is not free on the given host.""" + if not host: + raise ValueError("Host values of '' or None are not allowed.") + host = client_host(host) + port = int(port) + + import socket + + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(timeout) + s.connect((host, port)) + s.close() + raise IOError("Port %s is in use on %s; perhaps the previous " + "httpserver did not shut down properly." % + (repr(port), repr(host))) + except socket.error: + if s: + s.close() + +def wait_for_free_port(host, port): + """Wait for the specified port to become free (drop requests).""" + if not host: + raise ValueError("Host values of '' or None are not allowed.") + + for trial in xrange(50): + try: + # we are expecting a free port, so reduce the timeout + check_port(host, port, timeout=0.1) + except IOError: + # Give the old server thread time to free the port. + time.sleep(0.1) + else: + return + + raise IOError("Port %r not free on %r" % (port, host)) + +def wait_for_occupied_port(host, port): + """Wait for the specified port to become active (receive requests).""" + if not host: + raise ValueError("Host values of '' or None are not allowed.") + + for trial in xrange(50): + try: + check_port(host, port) + except IOError: + return + else: + time.sleep(.1) + + raise IOError("Port %r not bound on %r" % (port, host)) diff --git a/src/cherrypy/process/win32.py b/src/cherrypy/process/win32.py new file mode 100644 index 0000000000..9db928da5a --- /dev/null +++ b/src/cherrypy/process/win32.py @@ -0,0 +1,170 @@ +"""Windows service. Requires pywin32.""" + +import os +import thread +import win32api +import win32con +import win32event +import win32service +import win32serviceutil + +from cherrypy.process import wspbus, plugins + + +class ConsoleCtrlHandler(plugins.SimplePlugin): + """A WSPBus plugin for handling Win32 console events (like Ctrl-C).""" + + def __init__(self, bus): + self.is_set = False + plugins.SimplePlugin.__init__(self, bus) + + def start(self): + if self.is_set: + self.bus.log('Handler for console events already set.', level=40) + return + + result = win32api.SetConsoleCtrlHandler(self.handle, 1) + if result == 0: + self.bus.log('Could not SetConsoleCtrlHandler (error %r)' % + win32api.GetLastError(), level=40) + else: + self.bus.log('Set handler for console events.', level=40) + self.is_set = True + + def stop(self): + if not self.is_set: + self.bus.log('Handler for console events already off.', level=40) + return + + try: + result = win32api.SetConsoleCtrlHandler(self.handle, 0) + except ValueError: + # "ValueError: The object has not been registered" + result = 1 + + if result == 0: + self.bus.log('Could not remove SetConsoleCtrlHandler (error %r)' % + win32api.GetLastError(), level=40) + else: + self.bus.log('Removed handler for console events.', level=40) + self.is_set = False + + def handle(self, event): + """Handle console control events (like Ctrl-C).""" + if event in (win32con.CTRL_C_EVENT, win32con.CTRL_LOGOFF_EVENT, + win32con.CTRL_BREAK_EVENT, win32con.CTRL_SHUTDOWN_EVENT, + win32con.CTRL_CLOSE_EVENT): + self.bus.log('Console event %s: shutting down bus' % event) + + # Remove self immediately so repeated Ctrl-C doesn't re-call it. + try: + self.stop() + except ValueError: + pass + + self.bus.exit() + # 'First to return True stops the calls' + return 1 + return 0 + + +class Win32Bus(wspbus.Bus): + """A Web Site Process Bus implementation for Win32. + + Instead of using time.sleep for blocking, this bus uses native + win32event objects. It also responds to console events. + """ + + def __init__(self): + self.events = {} + wspbus.Bus.__init__(self) + + def _get_state_event(self, state): + """Return a win32event for the given state (creating it if needed).""" + try: + return self.events[state] + except KeyError: + event = win32event.CreateEvent(None, 0, 0, + u"WSPBus %s Event (pid=%r)" % + (state.name, os.getpid())) + self.events[state] = event + return event + + def _get_state(self): + return self._state + def _set_state(self, value): + self._state = value + event = self._get_state_event(value) + win32event.PulseEvent(event) + state = property(_get_state, _set_state) + + def wait(self, state, interval=0.1): + """Wait for the given state, KeyboardInterrupt or SystemExit. + + Since this class uses native win32event objects, the interval + argument is ignored. + """ + # Don't wait for an event that beat us to the punch ;) + if self.state != state: + event = self._get_state_event(state) + win32event.WaitForSingleObject(event, win32event.INFINITE) + + +class _ControlCodes(dict): + """Control codes used to "signal" a service via ControlService. + + User-defined control codes are in the range 128-255. We generally use + the standard Python value for the Linux signal and add 128. Example: + + >>> signal.SIGUSR1 + 10 + control_codes['graceful'] = 128 + 10 + """ + + def key_for(self, obj): + """For the given value, return its corresponding key.""" + for key, val in self.iteritems(): + if val is obj: + return key + raise ValueError("The given object could not be found: %r" % obj) + +control_codes = _ControlCodes({'graceful': 138}) + + +def signal_child(service, command): + if command == 'stop': + win32serviceutil.StopService(service) + elif command == 'restart': + win32serviceutil.RestartService(service) + else: + win32serviceutil.ControlService(service, control_codes[command]) + + +class PyWebService(win32serviceutil.ServiceFramework): + """Python Web Service.""" + + _svc_name_ = "Python Web Service" + _svc_display_name_ = "Python Web Service" + _svc_deps_ = None # sequence of service names on which this depends + _exe_name_ = "pywebsvc" + _exe_args_ = None # Default to no arguments + + # Only exists on Windows 2000 or later, ignored on windows NT + _svc_description_ = "Python Web Service" + + def SvcDoRun(self): + from cherrypy import process + process.bus.start() + process.bus.block() + + def SvcStop(self): + from cherrypy import process + self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) + process.bus.exit() + + def SvcOther(self, control): + process.bus.publish(control_codes.key_for(control)) + + +if __name__ == '__main__': + win32serviceutil.HandleCommandLine(PyWebService) diff --git a/src/cherrypy/process/wspbus.py b/src/cherrypy/process/wspbus.py new file mode 100644 index 0000000000..8c0d84bcbc --- /dev/null +++ b/src/cherrypy/process/wspbus.py @@ -0,0 +1,317 @@ +"""An implementation of the Web Site Process Bus. + +This module is completely standalone, depending only on the stdlib. + +Web Site Process Bus +-------------------- + +A Bus object is used to contain and manage site-wide behavior: +daemonization, HTTP server start/stop, process reload, signal handling, +drop privileges, PID file management, logging for all of these, +and many more. + +In addition, a Bus object provides a place for each web framework +to register code that runs in response to site-wide events (like +process start and stop), or which controls or otherwise interacts with +the site-wide components mentioned above. For example, a framework which +uses file-based templates would add known template filenames to an +autoreload component. + +Ideally, a Bus object will be flexible enough to be useful in a variety +of invocation scenarios: + + 1. The deployer starts a site from the command line via a framework- + neutral deployment script; applications from multiple frameworks + are mixed in a single site. Command-line arguments and configuration + files are used to define site-wide components such as the HTTP server, + WSGI component graph, autoreload behavior, signal handling, etc. + 2. The deployer starts a site via some other process, such as Apache; + applications from multiple frameworks are mixed in a single site. + Autoreload and signal handling (from Python at least) are disabled. + 3. The deployer starts a site via a framework-specific mechanism; + for example, when running tests, exploring tutorials, or deploying + single applications from a single framework. The framework controls + which site-wide components are enabled as it sees fit. + +The Bus object in this package uses topic-based publish-subscribe +messaging to accomplish all this. A few topic channels are built in +('start', 'stop', 'exit', and 'graceful'). Frameworks and site containers +are free to define their own. If a message is sent to a channel that has +not been defined or has no listeners, there is no effect. + +In general, there should only ever be a single Bus object per process. +Frameworks and site containers share a single Bus object by publishing +messages and subscribing listeners. + +The Bus object works as a finite state machine which models the current +state of the process. Bus methods move it from one state to another; +those methods then publish to subscribed listeners on the channel for +the new state. + + O + | + V + STOPPING --> STOPPED --> EXITING -> X + A A | + | \___ | + | \ | + | V V + STARTED <-- STARTING + +""" + +import atexit +import os +try: + set +except NameError: + from sets import Set as set +import sys +import threading +import time +import traceback as _traceback +import warnings + + +# Use a flag to indicate the state of the bus. +class _StateEnum(object): + class State(object): + name = None + def __repr__(self): + return "states.%s" % self.name + + def __setattr__(self, key, value): + if isinstance(value, self.State): + value.name = key + object.__setattr__(self, key, value) +states = _StateEnum() +states.STOPPED = states.State() +states.STARTING = states.State() +states.STARTED = states.State() +states.STOPPING = states.State() +states.EXITING = states.State() + + +class Bus(object): + """Process state-machine and messenger for HTTP site deployment. + + All listeners for a given channel are guaranteed to be called even + if others at the same channel fail. Each failure is logged, but + execution proceeds on to the next listener. The only way to stop all + processing from inside a listener is to raise SystemExit and stop the + whole server. + """ + + states = states + state = states.STOPPED + execv = False + + def __init__(self): + self.execv = False + self.state = states.STOPPED + self.listeners = dict( + [(channel, set()) for channel + in ('start', 'stop', 'exit', 'graceful', 'log')]) + self._priorities = {} + + def subscribe(self, channel, callback, priority=None): + """Add the given callback at the given channel (if not present).""" + if channel not in self.listeners: + self.listeners[channel] = set() + self.listeners[channel].add(callback) + + if priority is None: + priority = getattr(callback, 'priority', 50) + self._priorities[(channel, callback)] = priority + + def unsubscribe(self, channel, callback): + """Discard the given callback (if present).""" + listeners = self.listeners.get(channel) + if listeners and callback in listeners: + listeners.discard(callback) + del self._priorities[(channel, callback)] + + def publish(self, channel, *args, **kwargs): + """Return output of all subscribers for the given channel.""" + if channel not in self.listeners: + return [] + + exc = None + output = [] + + items = [(self._priorities[(channel, listener)], listener) + for listener in self.listeners[channel]] + items.sort() + for priority, listener in items: + try: + output.append(listener(*args, **kwargs)) + except KeyboardInterrupt: + raise + except SystemExit, e: + # If we have previous errors ensure the exit code is non-zero + if exc and e.code == 0: + e.code = 1 + raise + except: + self.log("Error in %r listener %r" % (channel, listener), + level=40, traceback=True) + exc = sys.exc_info()[1] + if exc: + raise + return output + + def _clean_exit(self): + """An atexit handler which asserts the Bus is not running.""" + if self.state != states.EXITING: + warnings.warn( + "The main thread is exiting, but the Bus is in the %r state; " + "shutting it down automatically now. You must either call " + "bus.block() after start(), or call bus.exit() before the " + "main thread exits." % self.state, RuntimeWarning) + self.exit() + + def start(self): + """Start all services.""" + atexit.register(self._clean_exit) + + self.state = states.STARTING + self.log('Bus STARTING') + try: + self.publish('start') + self.state = states.STARTED + self.log('Bus STARTED') + except (KeyboardInterrupt, SystemExit): + raise + except: + self.log("Shutting down due to error in start listener:", + level=40, traceback=True) + e_info = sys.exc_info() + try: + self.exit() + except: + # Any stop/exit errors will be logged inside publish(). + pass + raise e_info[0], e_info[1], e_info[2] + + def exit(self): + """Stop all services and prepare to exit the process.""" + self.stop() + + self.state = states.EXITING + self.log('Bus EXITING') + self.publish('exit') + # This isn't strictly necessary, but it's better than seeing + # "Waiting for child threads to terminate..." and then nothing. + self.log('Bus EXITED') + + def restart(self): + """Restart the process (may close connections). + + This method does not restart the process from the calling thread; + instead, it stops the bus and asks the main thread to call execv. + """ + self.execv = True + self.exit() + + def graceful(self): + """Advise all services to reload.""" + self.log('Bus graceful') + self.publish('graceful') + + def block(self, interval=0.1): + """Wait for the EXITING state, KeyboardInterrupt or SystemExit.""" + try: + self.wait(states.EXITING, interval=interval) + except (KeyboardInterrupt, IOError): + # The time.sleep call might raise + # "IOError: [Errno 4] Interrupted function call" on KBInt. + self.log('Keyboard Interrupt: shutting down bus') + self.exit() + except SystemExit: + self.log('SystemExit raised: shutting down bus') + self.exit() + raise + + # Waiting for ALL child threads to finish is necessary on OS X. + # See http://www.cherrypy.org/ticket/581. + # It's also good to let them all shut down before allowing + # the main thread to call atexit handlers. + # See http://www.cherrypy.org/ticket/751. + self.log("Waiting for child threads to terminate...") + for t in threading.enumerate(): + if (t != threading.currentThread() and t.isAlive() + # Note that any dummy (external) threads are always daemonic. + and not t.isDaemon()): + t.join() + + if self.execv: + self._do_execv() + + def wait(self, state, interval=0.1): + """Wait for the given state.""" + def _wait(): + while self.state != state: + time.sleep(interval) + + # From http://psyco.sourceforge.net/psycoguide/bugs.html: + # "The compiled machine code does not include the regular polling + # done by Python, meaning that a KeyboardInterrupt will not be + # detected before execution comes back to the regular Python + # interpreter. Your program cannot be interrupted if caught + # into an infinite Psyco-compiled loop." + try: + sys.modules['psyco'].cannotcompile(_wait) + except (KeyError, AttributeError): + pass + + _wait() + + def _do_execv(self): + """Re-execute the current process. + + This must be called from the main thread, because certain platforms + (OS X) don't allow execv to be called in a child thread very well. + """ + args = sys.argv[:] + self.log('Re-spawning %s' % ' '.join(args)) + args.insert(0, sys.executable) + if sys.platform == 'win32': + args = ['"%s"' % arg for arg in args] + + os.execv(sys.executable, args) + + def stop(self): + """Stop all services.""" + self.state = states.STOPPING + self.log('Bus STOPPING') + self.publish('stop') + self.state = states.STOPPED + self.log('Bus STOPPED') + + def start_with_callback(self, func, args=None, kwargs=None): + """Start 'func' in a new thread T, then start self (and return T).""" + if args is None: + args = () + if kwargs is None: + kwargs = {} + args = (func,) + args + + def _callback(func, *a, **kw): + self.wait(states.STARTED) + func(*a, **kw) + t = threading.Thread(target=_callback, args=args, kwargs=kwargs) + t.setName('Bus Callback ' + t.getName()) + t.start() + + self.start() + + return t + + def log(self, msg="", level=20, traceback=False): + """Log the given message. Append the last traceback if requested.""" + if traceback: + exc = sys.exc_info() + msg += "\n" + "".join(_traceback.format_exception(*exc)) + self.publish('log', msg, level) + +bus = Bus() diff --git a/src/cherrypy/scaffold/example.conf b/src/cherrypy/scaffold/example.conf new file mode 100644 index 0000000000..93a6e53c05 --- /dev/null +++ b/src/cherrypy/scaffold/example.conf @@ -0,0 +1,3 @@ +[/] +log.error_file: "error.log" +log.access_file: "access.log" \ No newline at end of file diff --git a/src/cherrypy/scaffold/site.conf b/src/cherrypy/scaffold/site.conf new file mode 100644 index 0000000000..6fc8f4ec34 --- /dev/null +++ b/src/cherrypy/scaffold/site.conf @@ -0,0 +1,8 @@ +[global] +# Uncomment this when you're done developing +#environment: "production" + +server.socket_host: "0.0.0.0" +server.socket_port: 8088 + +tree.myapp: cherrypy.Application(scaffold.root, "/", "cherrypy/scaffold/example.conf") diff --git a/src/cherrypy/scaffold/static/made_with_cherrypy_small.png b/src/cherrypy/scaffold/static/made_with_cherrypy_small.png new file mode 100644 index 0000000000..c3aafeed95 Binary files /dev/null and b/src/cherrypy/scaffold/static/made_with_cherrypy_small.png differ diff --git a/src/cherrypy/wsgiserver/__init__.py b/src/cherrypy/wsgiserver/__init__.py new file mode 100644 index 0000000000..c3172c73ff --- /dev/null +++ b/src/cherrypy/wsgiserver/__init__.py @@ -0,0 +1,1560 @@ +"""A high-speed, production ready, thread pooled, generic WSGI server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery): + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!\n'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set these attributes: + + server.ssl_certificate = + server.ssl_private_key = + + if __name__ == '__main__': + try: + server.start() + except KeyboardInterrupt: + server.stop() + +This won't call the CherryPy engine (application side) at all, only the +WSGI server, which is independant from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + req.read_headers() + req.respond() + -> response = wsgi_app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + + +import base64 +import os +import Queue +import re +quoted_slash = re.compile("(?i)%2F") +import rfc822 +import socket +try: + import cStringIO as StringIO +except ImportError: + import StringIO +import sys +import threading +import time +import traceback +from urllib import unquote +from urlparse import urlparse +import warnings + +try: + from OpenSSL import SSL + from OpenSSL import crypto +except ImportError: + SSL = None + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return dict.fromkeys(nums).keys() + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING', + 'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL', + 'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT', + 'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE', + 'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING', + 'WWW-AUTHENTICATE'] + + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = apps.items() + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort() + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return ''.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + + send: the 'send' method from the connection's socket object. + wsgi_app: the WSGI application to call. + environ: a partial WSGI environ (server and connection entries). + The caller MUST set the following entries: + * All wsgi.* entries, including .input + * SERVER_NAME and SERVER_PORT + * Any SSL_* entries + * Any custom entries like REMOTE_ADDR and REMOTE_PORT + * SERVER_SOFTWARE: the value to write in the "Server" response header. + * ACTUAL_SERVER_PROTOCOL: the value to write in the Status-Line of + the response. From RFC 2145: "An HTTP server SHOULD send a + response version equal to the highest version for which the + server is at least conditionally compliant, and whose major + version is less than or equal to the one received in the + request. An HTTP server MUST NOT send a version for which + it is not at least conditionally compliant." + + outheaders: a list of header tuples to write in the response. + ready: when True, the request has been parsed and is ready to begin + generating the response. When False, signals the calling Connection + that the response should not be generated and the connection should + close. + close_connection: signals the calling Connection that the request + should close. This does not imply an error! The client and/or + server may each request that the connection be closed. + chunked_write: if True, output will be encoded with the "chunked" + transfer-coding. This value is set automatically inside + send_headers. + """ + + max_request_header_size = 0 + max_request_body_size = 0 + + def __init__(self, wfile, environ, wsgi_app): + self.rfile = environ['wsgi.input'] + self.wfile = wfile + self.environ = environ.copy() + self.wsgi_app = wsgi_app + + self.ready = False + self.started_response = False + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = False + self.chunked_write = False + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile.maxlen = self.max_request_header_size + self.rfile.bytes_read = 0 + + try: + self._parse_request() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large") + return + + def _parse_request(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + if not request_line: + # Force self.ready = False so the connection will close. + self.ready = False + return + + if request_line == "\r\n": + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + self.ready = False + return + + environ = self.environ + + method, path, req_protocol = request_line.strip().split(" ", 2) + environ["REQUEST_METHOD"] = method + + # path may be an abs_path (including "http://host.domain.tld"); + scheme, location, path, params, qs, frag = urlparse(path) + + if frag: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return + + if scheme: + environ["wsgi.url_scheme"] = scheme + if params: + path = path + ";" + params + + environ["SCRIPT_NAME"] = "" + + # Unquote the path+params (e.g. "/this%20path" -> "this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + atoms = [unquote(x) for x in quoted_slash.split(path)] + path = "%2F".join(atoms) + environ["PATH_INFO"] = path + + # Note that, like wsgiref and most other WSGI servers, + # we unquote the path but not the query string. + environ["QUERY_STRING"] = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + rp = int(req_protocol[5]), int(req_protocol[7]) + server_protocol = environ["ACTUAL_SERVER_PROTOCOL"] + sp = int(server_protocol[5]), int(server_protocol[7]) + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + environ["SERVER_PROTOCOL"] = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + + # If the Request-URI was an absoluteURI, use its location atom. + if location: + environ["SERVER_NAME"] = location + + # then all the http headers + try: + self.read_headers() + except ValueError, ex: + self.simple_response("400 Bad Request", repr(ex.args)) + return + + mrbs = self.max_request_body_size + if mrbs and int(environ.get("CONTENT_LENGTH", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large") + return + + # Set AUTH_TYPE, REMOTE_USER + creds = environ.get("HTTP_AUTHORIZATION", "").split(" ", 1) + environ["AUTH_TYPE"] = creds[0] + if creds[0].lower() == 'basic': + user, pw = base64.decodestring(creds[1]).split(":", 1) + environ["REMOTE_USER"] = user + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if environ.get("HTTP_CONNECTION", "") == "close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if environ.get("HTTP_CONNECTION", "") != "Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = environ.get("HTTP_TRANSFER_ENCODING") + if te: + te = [x.strip().lower() for x in te.split(",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == "chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if environ.get("HTTP_EXPECT", "") == "100-continue": + self.simple_response(100) + + self.ready = True + + def read_headers(self): + """Read header lines from the incoming stream.""" + environ = self.environ + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == '\r\n': + # Normal end of headers + break + + if line[0] in ' \t': + # It's a continuation line. + v = line.strip() + else: + k, v = line.split(":", 1) + k, v = k.strip().upper(), v.strip() + envname = "HTTP_" + k.replace("-", "_") + + if k in comma_separated_headers: + existing = environ.get(envname) + if existing: + v = ", ".join((existing, v)) + environ[envname] = v + + ct = environ.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + environ["CONTENT_TYPE"] = ct + cl = environ.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + environ["CONTENT_LENGTH"] = cl + + def decode_chunked(self): + """Decode the 'chunked' transfer coding.""" + cl = 0 + data = StringIO.StringIO() + while True: + line = self.rfile.readline().strip().split(";", 1) + chunk_size = int(line.pop(0), 16) + if chunk_size <= 0: + break +## if line: chunk_extension = line[0] + cl += chunk_size + data.write(self.rfile.read(chunk_size)) + crlf = self.rfile.read(2) + if crlf != "\r\n": + self.simple_response("400 Bad Request", + "Bad chunked transfer coding " + "(expected '\\r\\n', got %r)" % crlf) + return + + # Grab any trailer headers + self.read_headers() + + data.seek(0) + self.environ["wsgi.input"] = data + self.environ["CONTENT_LENGTH"] = str(cl) or "" + return True + + def respond(self): + """Call the appropriate WSGI app and write its iterable output.""" + # Set rfile.maxlen to ensure we don't read past Content-Length. + # This will also be used to read the entire request body if errors + # are raised before the app can read the body. + if self.chunked_read: + # If chunked, Content-Length will be 0. + self.rfile.maxlen = self.max_request_body_size + else: + cl = int(self.environ.get("CONTENT_LENGTH", 0)) + if self.max_request_body_size: + self.rfile.maxlen = min(cl, self.max_request_body_size) + else: + self.rfile.maxlen = cl + self.rfile.bytes_read = 0 + + try: + self._respond() + except MaxSizeExceeded: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large") + return + + def _respond(self): + if self.chunked_read: + if not self.decode_chunked(): + self.close_connection = True + return + + response = self.wsgi_app(self.environ, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.wfile.sendall("0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = ["%s %s\r\n" % (self.environ['ACTUAL_SERVER_PROTOCOL'], status), + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n"] + + if status[:3] == "413" and self.response_protocol == 'HTTP/1.1': + # Request Entity Too Large + self.close_connection = True + buf.append("Connection: close\r\n") + + buf.append("\r\n") + if msg: + buf.append(msg) + self.wfile.sendall("".join(buf)) + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.sent_headers: + try: + raise exc_info[0], exc_info[1], exc_info[2] + finally: + exc_info = None + + self.started_response = True + self.status = status + self.outheaders.extend(headers) + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + if not self.sent_headers: + self.sent_headers = True + self.send_headers() + + if self.chunked_write and chunk: + buf = [hex(len(chunk))[2:], "\r\n", chunk, "\r\n"] + self.wfile.sendall("".join(buf)) + else: + self.wfile.sendall(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers.""" + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif "content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if self.response_protocol == 'HTTP/1.1': + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append(("Transfer-Encoding", "chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if "connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append(("Connection", "close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append(("Connection", "Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + size = self.rfile.maxlen - self.rfile.bytes_read + if size > 0: + self.rfile.read(size) + + if "date" not in hkeys: + self.outheaders.append(("Date", rfc822.formatdate())) + + if "server" not in hkeys: + self.outheaders.append(("Server", self.environ['SERVER_SOFTWARE'])) + + buf = [self.environ['ACTUAL_SERVER_PROTOCOL'], " ", self.status, "\r\n"] + try: + buf += [k + ": " + v + "\r\n" for k, v in self.outheaders] + except TypeError: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not a string.") + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not a string.") + else: + raise + buf.append("\r\n") + self.wfile.sendall("".join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_fileobject(socket._fileobject): + """Faux file object attached to a socket object.""" + + def sendall(self, data): + """Sendall for non-blocking sockets.""" + while data: + try: + bytes_sent = self.send(data) + data = data[bytes_sent:] + except socket.error, e: + if e.args[0] not in socket_errors_nonblocking: + raise + + def send(self, data): + return self._sock.send(data) + + def flush(self): + if self._wbuf: + buffer = "".join(self._wbuf) + self._wbuf = [] + self.sendall(buffer) + + def recv(self, size): + while True: + try: + return self._sock.recv(size) + except socket.error, e: + if e.args[0] not in socket_errors_nonblocking: + raise + + def read(self, size=-1): + if size < 0: + # Read until EOF + buffers = [self._rbuf] + self._rbuf = "" + if self._rbufsize <= 1: + recv_size = self.default_bufsize + else: + recv_size = self._rbufsize + + while True: + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + return "".join(buffers) + else: + # Read until size bytes or EOF seen, whichever comes first + data = self._rbuf + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + left = size - buf_len + recv_size = max(self._rbufsize, left) + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + def readline(self, size=-1): + data = self._rbuf + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + assert data == "" + buffers = [] + while data != "\n": + data = self.recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + return "".join(buffers) + else: + # Read until size bytes or \n or EOF seen, whichever comes first + nl = data.find('\n', 0, size) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + left = size - buf_len + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + +class SSL_fileobject(CP_fileobject): + """SSL file object attached to a socket object.""" + + ssl_timeout = 3 + ssl_retry = .01 + + def _safe_call(self, is_reader, call, *args, **kwargs): + """Wrap the given call with SSL error-trapping. + + is_reader: if False EOF errors will be raised. If True, EOF errors + will return "" (to emulate normal sockets). + """ + start = time.time() + while True: + try: + return call(*args, **kwargs) + except SSL.WantReadError: + # Sleep and try again. This is dangerous, because it means + # the rest of the stack has no way of differentiating + # between a "new handshake" error and "client dropped". + # Note this isn't an endless loop: there's a timeout below. + time.sleep(self.ssl_retry) + except SSL.WantWriteError: + time.sleep(self.ssl_retry) + except SSL.SysCallError, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + errnum = e.args[0] + if is_reader and errnum in socket_errors_to_ignore: + return "" + raise socket.error(errnum) + except SSL.Error, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + thirdarg = None + try: + thirdarg = e.args[0][0][2] + except IndexError: + pass + + if thirdarg == 'http request': + # The client is talking HTTP to an HTTPS server. + raise NoSSLError() + raise FatalSSLAlert(*e.args) + except: + raise + + if time.time() - start > self.ssl_timeout: + raise socket.timeout("timed out") + + def recv(self, *args, **kwargs): + buf = [] + r = super(SSL_fileobject, self).recv + while True: + data = self._safe_call(True, r, *args, **kwargs) + buf.append(data) + p = self._sock.pending() + if not p: + return "".join(buf) + + def sendall(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).sendall, *args, **kwargs) + + def send(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).send, *args, **kwargs) + + +class HTTPConnection(object): + """An HTTP connection (active socket). + + socket: the raw socket object (usually TCP) for this connection. + wsgi_app: the WSGI application for this server/connection. + environ: a WSGI environ template. This will be copied for each request. + + rfile: a fileobject for reading from the socket. + send: a function for writing (+ flush) to the socket. + """ + + rbufsize = -1 + RequestHandlerClass = HTTPRequest + environ = {"wsgi.version": (1, 0), + "wsgi.url_scheme": "http", + "wsgi.multithread": True, + "wsgi.multiprocess": False, + "wsgi.run_once": False, + "wsgi.errors": sys.stderr, + } + + def __init__(self, sock, wsgi_app, environ): + self.socket = sock + self.wsgi_app = wsgi_app + + # Copy the class environ into self. + self.environ = self.environ.copy() + self.environ.update(environ) + + if SSL and isinstance(sock, SSL.ConnectionType): + timeout = sock.gettimeout() + self.rfile = SSL_fileobject(sock, "rb", self.rbufsize) + self.rfile.ssl_timeout = timeout + self.wfile = SSL_fileobject(sock, "wb", -1) + self.wfile.ssl_timeout = timeout + else: + self.rfile = CP_fileobject(sock, "rb", self.rbufsize) + self.wfile = CP_fileobject(sock, "wb", -1) + + # Wrap wsgi.input but not HTTPConnection.rfile itself. + # We're also not setting maxlen yet; we'll do that separately + # for headers and body for each iteration of self.communicate + # (if maxlen is 0 the wrapper doesn't check length). + self.environ["wsgi.input"] = SizeCheckWrapper(self.rfile, 0) + + def communicate(self): + """Read each request and respond appropriately.""" + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.wfile, self.environ, + self.wsgi_app) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if not req.ready: + return + + req.respond() + if req.close_connection: + return + + except socket.error, e: + errnum = e.args[0] + if errnum == 'timed out': + if req and not req.sent_headers: + req.simple_response("408 Request Timeout") + elif errnum not in socket_errors_to_ignore: + if req and not req.sent_headers: + req.simple_response("500 Internal Server Error", + format_exc()) + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert, e: + # Close the connection. + return + except NoSSLError: + # Unwrap our wfile + req.wfile = CP_fileobject(self.socket, "wb", -1) + if req and not req.sent_headers: + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + except Exception, e: + if req and not req.sent_headers: + req.simple_response("500 Internal Server Error", format_exc()) + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + self.socket._sock.close() + + self.socket.close() + + +def format_exc(limit=None): + """Like print_exc() but return a string. Backport for Python 2.3.""" + try: + etype, value, tb = sys.exc_info() + return ''.join(traceback.format_exception(etype, value, tb, limit)) + finally: + etype = value = tb = None + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + server: the HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it. + ready: a simple flag for the calling server to know when this thread + has begun polling the Queue. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + + def __init__(self, server): + self.ready = False + self.server = server + threading.Thread.__init__(self) + + def run(self): + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + try: + conn.communicate() + finally: + conn.close() + self.conn = None + except (KeyboardInterrupt, SystemExit), exc: + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for the CherryPyWSGIServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = Queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in xrange(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP WSGIServer " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in xrange(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP WSGIServer " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in xrange(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + worker.join(timeout) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + if SSL and isinstance(c.socket, SSL.ConnectionType): + # pyOpenSSL.socket.shutdown takes no args + c.socket.shutdown() + else: + c.socket.shutdown(socket.SHUT_RD) + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt), exc1: + pass + + + +class SSLConnection: + """A thread-safe wrapper for an SSL.Connection. + + *args: the arguments to create the wrapped SSL.Connection(*args). + """ + + def __init__(self, *args): + self._ssl_conn = SSL.Connection(*args) + self._lock = threading.RLock() + + for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', + 'renegotiate', 'bind', 'listen', 'connect', 'accept', + 'setblocking', 'fileno', 'shutdown', 'close', 'get_cipher_list', + 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', + 'makefile', 'get_app_data', 'set_app_data', 'state_string', + 'sock_shutdown', 'get_peer_certificate', 'want_read', + 'want_write', 'set_connect_state', 'set_accept_state', + 'connect_ex', 'sendall', 'settimeout'): + exec """def %s(self, *args): + self._lock.acquire() + try: + return self._ssl_conn.%s(*args) + finally: + self._lock.release() +""" % (f, f) + + + +class CherryPyWSGIServer(object): + """An HTTP server for WSGI. + + bind_addr: The interface on which to listen for connections. + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string. + wsgi_app: the WSGI 'application callable'; multiple WSGI applications + may be passed as (path_prefix, app) pairs. + numthreads: the number of worker threads to create (default 10). + server_name: the string to set for WSGI's SERVER_NAME environ entry. + Defaults to socket.gethostname(). + max: the maximum number of queued requests (defaults to -1 = no limit). + request_queue_size: the 'backlog' argument to socket.listen(); + specifies the maximum number of queued connections (default 5). + timeout: the timeout in seconds for accepted connections (default 10). + + nodelay: if True (the default since 3.1), sets the TCP_NODELAY socket + option. + + protocol: the version string to write in the Status-Line of all + HTTP responses. For example, "HTTP/1.1" (the default). This + also limits the supported features used in the response. + + + SSL/HTTPS + --------- + The OpenSSL module must be importable for SSL functionality. + You can obtain it from http://pyopenssl.sourceforge.net/ + + ssl_certificate: the filename of the server SSL certificate. + ssl_privatekey: the filename of the server's private key file. + + If either of these is None (both are None by default), this server + will not use SSL. If both are given and are valid, they will be read + on server start and used in the SSL context for the listening socket. + """ + + protocol = "HTTP/1.1" + _bind_addr = "127.0.0.1" + version = "CherryPy/3.1.0" + ready = False + _interrupt = None + + nodelay = True + + ConnectionClass = HTTPConnection + environ = {} + + # Paths to certificate and private key files + ssl_certificate = None + ssl_private_key = None + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + + if callable(wsgi_app): + # We've been handed a single wsgi_app, in CP-2.1 style. + # Assume it's mounted at "". + self.wsgi_app = wsgi_app + else: + # We've been handed a list of (path_prefix, wsgi_app) tuples, + # so that the server can call different wsgi_apps, and also + # correctly set SCRIPT_NAME. + warnings.warn("The ability to pass multiple apps is deprecated " + "and will be removed in 3.2. You should explicitly " + "include a WSGIPathInfoDispatcher instead.", + DeprecationWarning) + self.wsgi_app = WSGIPathInfoDispatcher(wsgi_app) + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 0777) + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + # Probably a DNS issue. Assume IPv4. + info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error, msg: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error, msg + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + while self.ready: + self.tick() + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay: + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + if self.ssl_certificate and self.ssl_private_key: + if SSL is None: + raise ImportError("You must install pyOpenSSL to use HTTPS.") + + # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 + ctx = SSL.Context(SSL.SSLv23_METHOD) + ctx.use_privatekey_file(self.ssl_private_key) + ctx.use_certificate_file(self.ssl_certificate) + self.socket = SSLConnection(ctx, self.socket) + self.populate_ssl_environ() + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if not self.ready: + return + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + environ = self.environ.copy() + # SERVER_SOFTWARE is common for IIS. It's also helpful for + # us to pass a default value for the "Server" response header. + environ["SERVER_SOFTWARE"] = "%s WSGI Server" % self.version + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + environ["ACTUAL_SERVER_PROTOCOL"] = self.protocol + environ["SERVER_NAME"] = self.server_name + + if isinstance(self.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + environ["SERVER_PORT"] = "" + else: + environ["SERVER_PORT"] = str(self.bind_addr[1]) + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + environ["REMOTE_ADDR"] = addr[0] + environ["REMOTE_PORT"] = str(addr[1]) + + conn = self.ConnectionClass(s, self.wsgi_app, environ) + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error, x: + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error, x: + if x.args[1] != "Bad file descriptor": + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + def populate_ssl_environ(self): + """Create WSGI environ entries to be merged into each request.""" + cert = open(self.ssl_certificate, 'rb').read() + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) + ssl_environ = { + "wsgi.url_scheme": "https", + "HTTPS": "on", + # pyOpenSSL doesn't provide access to any of these AFAICT +## 'SSL_PROTOCOL': 'SSLv2', +## SSL_CIPHER string The cipher specification name +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + + # Server certificate attributes + ssl_environ.update({ + 'SSL_SERVER_M_VERSION': cert.get_version(), + 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), +## 'SSL_SERVER_V_START': Validity of server's certificate (start time), +## 'SSL_SERVER_V_END': Validity of server's certificate (end time), + }) + + for prefix, dn in [("I", cert.get_issuer()), + ("S", cert.get_subject())]: + # X509Name objects don't seem to have a way to get the + # complete DN string. Use str() and slice it instead, + # because str(dn) == "" + dnstr = str(dn)[18:-2] + + wsgikey = 'SSL_SERVER_%s_DN' % prefix + ssl_environ[wsgikey] = dnstr + + # The DN should be of the form: /k1=v1/k2=v2, but we must allow + # for any value to contain slashes itself (in a URL). + while dnstr: + pos = dnstr.rfind("=") + dnstr, value = dnstr[:pos], dnstr[pos + 1:] + pos = dnstr.rfind("/") + dnstr, key = dnstr[:pos], dnstr[pos + 1:] + if key and value: + wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) + ssl_environ[wsgikey] = value + + self.environ.update(ssl_environ) +