diff --git a/Changelog.old.yaml b/Changelog.old.yaml index 0bdd7ba746..0e601874a5 100644 --- a/Changelog.old.yaml +++ b/Changelog.old.yaml @@ -1,3 +1,4213 @@ +- version: 0.7.59 + date: 2011-04-30 + + bug fixes: + - title: "Fixes a bug in 0.7.58 that caused too small fonts when converting to MOBI for the Kindle. Apologies." + + - title: "Apple driver: Handle invalid EPUBs that do not contain an OPF file" + + new recipes: + - title: The Big Picture and Auto industry news + author: welovelucy + + - title: Gazeta Prawna + author: Vroo + + - title: Various Czech news sources + author: Tomas Latal + + - title: Diario de Ibiza + author: Joan Tur + +- version: 0.7.58 + date: 2011-04-29 + + new features: + - title: "Support for converting and reading metadata from Plucker format PDB files" + type: major + + - title: "The metadata that is displayed in the book details panel on the right is now completely configurable via Preferences->Look & Feel" + + - title: "Add a column that shows the date when the metadata of a book record was last modified in calibre. To see the column, right click on the column headers in calibre and select Show column->Modified. Note that the dates may be incorrect for books added with older versions of calibre." + + - title: "Add command line option to shutdown running calibre" + + - title: "CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified" + + - title: "Add a popup menu to the 'Create saved search button' to allow easy deleting of saved searches" + + bug fixes: + - title: "Fix regression that broke converting to LIT in 0.7.57" + tickets: [769334] + + - title: "Conversion pipeline: Remove encoding declarations from input HTML documents to guarantee that there is only a single encoding declaration in the output HTML." + tickets: [773337] + + - title: "Correctly parenthesize searches that are used to make search restrictions" + + - title: "Fix ratings in save to disk templates not being divided by 2" + + - title: "TXT to EPUB: Underlined words (following quotes?) fail to become italics" + tickets: [772267] + + - title: "Fix template function source code unavailable when not running calibre from source" + + - title: "Fix adding html books from the top of a deep folder hierarchy very slow" + + - title: "Only set language in MOBI metadata if it is not null" + + - title: "Fix 'count-of' searches (e.g., tags:#>3)." + tickets: [771175] + + - title: "Fix regression that broke connection to iTunes in some cases" + tickets: [771164] + + - title: "Fix buggy regex that made converting PDFs with the string ****************** very slow" + tickets: [770534] + + - title: "Fix Ctrl+L shortcut to lookup word not working in ebook viewer" + tickets: [769492] + + - title: "Fix regression that broke searching on boolean columns" + + improved recipes: + - HBR Blogs + - The Marker + - Financial Times + - Clarin + - Honolulu Star Advertiser + + new recipes: + - title: Novi Standard + author: Darko Miletic + + - title: Autobild.ro and Social Diva + author: Silviu Cotoara + + - title: Novinky + author: Tomas Latal + + - title: "De Volksrant (subscriber version)" + author: Selcal + + +- version: 0.7.57 + date: 2011-04-22 + + new features: + - title: "Launch worker processes on demand instead of keeping a pool of them in memory. Reduces memory footprint." + + - title: "Use the visual formatting of the Table of Contents to try to automatically create a multi-level TOC when converting/viewing MOBI files." + tickets: [763681] + + - title: "Add a new function booksize() to the template language to get the value of the size column in calibre." + + - title: "Add support for using metadata plugboards with the content server (only with the epub format)" + + - title: "Change default algorithm for automatically computing author sort to be more intelligent and handle the case when the author name has a comma in it" + + - title: "Show cover size in the tooltips of the book details panel and book details popup window" + + bug fixes: + - title: "Dragging and dropping a cover onto the book details panel did not change the cover size" + tickets: [768332] + + - title: "Fix non-escaped '|' when searching for commas in authors using REGEXP_MATCH" + + - title: "Fix ratings in templates being multiplied by 2" + + - title: "Fix adding a comma to custom series values when using completion." + tickets: [763788] + + - title: "CHM Input: Another workaround for a Microsoft mess." + tickets: [763336] + + - title: "Fix job count in the spinner not always being updated when a job completes" + + - title: "Changing case only of a title does not update title sort" + tickets: [768904] + + improved recipes: + - ecuisine.ro, egirl.ro and tabu.ro + - Daily Telegraph + - Handelsblatt + - Il Sole 24 Ore + - Newsweek + - Arcamax + + new recipes: + - title: BabyOnline.ro + author: Silviu Cotoara + + - title: "The Journal.ie" + author: Phil Burns + + - title: "Der Spiegel" + author: Nikolas Mangold + +- version: 0.7.56 + date: 2011-04-17 + + new features: + - title: "This is primarily a bug fix release that fixes a bug in 0.7.55 that caused calibre to rescan the files on the device every time the device is connected. If you updated to 0.7.55 it is highly recommended you update to 0.7.56" + + - title: "Device driver for Coby Kyros" + + - title: "Remove the quick access to search options from next to the search bar, as we now have a separate search highlights toggle button" + + - title: "MOBI Output: Ensure that MOBI files always have 8KB worth of null bytes at the end of record 0. This appears to be necessary for Amazon to be able to add DRM to calibre generated MOBI files sent to their publishing service." + + - title: "Add a tool to inspect MOBI files. To use: calibre-debug -m file.mobi" + + bug fixes: + - title: "Fixed regression taht caused calibre to rescan files on the device on every reconnect" + + - title: "Fix donate button causing the toolbar to be too large on OS X" + + - title: "MOBI Input: Fix detection of Table of Contents for MOBI files that have a page break between the location designated as the Table of Contents and the actual table of contents." + tickets: [763504] + + - title: "Comic Input: Fix handling of some CBZ files that have wrongly encoded non ASCII filenames on windows." + tickets: [763280] + + - title: "PML Input: Fix multi-line chapter title causing a spurious page break" + tickets: [763238] + + - title: "EPUB Input: Speed up processing of files with very large manifest/spines" + + - title: "Fix regression that broke cover:False searches in 0.7.55" + + improved recipes: + - Suedduetsche Zeitung + - Irish Times + - Big Oven + - NSPM + + +- version: 0.7.55 + date: 2011-04-15 + + new features: + - title: "Add a menu bar. Useful if you use a lot of plugins and are running out of space in your toolbars. By default the menu bar is hidden (except on OS X). You can add actions to it via Preferences->Toolbars. As soon as you add actions, it will become visible." + + - title: "OS X: Make the main calibre window look a little more 'native' on OS X" + + - title: "Show recently viewed books in the View button's drop down menu" + + - title: "Add a button next to the search bar to toggle easily between highlight and restrict search modes" + + - title: "Allow the use of arbitrary searches as search restrictions, rather than just saved searches. Do this by using the special entry '*Current Search' in the Search Restriction dropdown." + + - title: "The Connect/share icon now changes color to indicate that the content server is running" + tickets: [755444] + + - title: "Device drivers for Viewpad 7, Motorola Xoom and Asus Eee Note" + + - title: "Add tags like composite custom column." + tickets: [759663] + + - title: "Add a new date format code 'iso'. Permits formatting dates to see the complete time (via Preferences->Tweaks)" + + - title: "Allow the use of data from the size column in the template language" + tickets: [759645] + + - title: "Support reading/writing covers to txtz/htmlz files" + + - title: "Speedup for large library sorting when using composite custom columns" + + - title: "Move the boolean columns are tristate tweak to Preferences->Behavior" + + bug fixes: + - title: "Fix a regression in 0.7.54 that broke reading covers/metadata from cbz files." + tickets: [756892] + + - title: "Fix tweak names and help not translatable" + tickets: [756736] + + - title: "When the size of a book is less that 0.1MB but not zero, display the size as <0.1 instead of 0.0." + tickets: [755768] + + - title: "HTMLZ input: Fix handling of HTML files encoded in an encoding other than UTF-8" + + - title: "EPUB Input: Fix EPUB files with empty Adobe PAGE templates causing conversion to abort." + tickets: [760390] + + - title: "Fix CHM input plugin not closing opened input file" + tickets: [760589] + + - title: "MOBI Output: Make super/subscripts use a slightly smaller font when rendered on a Kindle. Also allow the use of vertical-align:top/bottom in the CSS to specify a super/subscript." + tickets: [758667] + + - title: "LRF Input: Detect and workaround LRF files that have deeply nested spans, instead of crashing." + tickets: [759680] + + - title: "MOBI Output: Fix bug that would cause conversion to unneccessarily abort when malformed hyperlinks are present in the input document." + tickets: [759313] + + - title: "Make true and false searches work correctly for numeric fields." + + - title: "MOBI Output: The Ignore margins setting no longer ignores blockquotes, only margins set via CSS on other elements." + tickets: [758675] + + - title: "Fix regression that caused clicking auto send to also change the email address in Preferences->Email" + + improved recipes: + - Wall Street Journal + - Weblogs SL + - Tabu.ro + - Vecernje Novosti + + new recipes: + - title: Hallo Assen and Dvhn + author: Reijendert + + +- version: 0.7.54 + date: 2011-04-08 + + new features: + - title: "New output format, HTMLZ which is a single HTML file with its associated images/stylesheets in a zipped up file" + description: "Useful when you want to convert your ebook into a single HTML file for easy editing. Note that this output plugin is still new and needs testing" + + - title: "When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa" + + - title: "Support for the Motorola Atrix" + + - title: "Allow the icons in the toolbar to be turned off completely via Preferences->Look & Feel" + + - title: "When downloading metadata use the gzip transfer encoding when possible for a speedup." + tickets: [749304] + + bug fixes: + - title: "Conversion pipeline: Workaround for bug in lxml that causes a massive mem leak on windows and OS X when the input document contains non ASCII CSS selectors." + tickets: [754555] + + - title: "Conversion pipeline: Handle inline + + +
+

Testing cfi.coffee

+

Click anywhere and the location will be marked with a marker, whose position is set via a CFI.

+

Reset CFI to None

+

A div with scrollbars

+

Scroll down and click on some elements. Make sure to hit both + bold and not bold text as well as different points on the image

+
But I must explain to you how all this mistaken + idea of denouncing pleasure and praising pain was born and I + will give you a complete account of the system, and expound the + actual teachings of the great explorer of the truth, the + master-builder of human happiness. No one rejects, dislikes, or + avoids pleasure itself, because it is pleasure, but because + those who do not know how to pursue pleasure rationally + encounter consequences that are extremely painful. Nor again is + there anyone who loves or pursues or desires to obtain + pain of itself, because it is pain, but because occasionally + circumstances occur in which toil and pain can procure him some + great pleasure. To take a trivial example, which of us ever + undertakes laborious physical exercise, except to obtain some + advantage from it? But who has any right to find fault with a + man who chooses to enjoy a pleasure that has no annoying + consequences, or one who avoids a pain that produces no + resultant pleasure? On the other hand, we denounce with + righteous indignation and dislike men who are so beguiled and + demoralized by the charms of pleasure of the moment, so blinded + by desire, that they cannot foresee + Test Image + +
+

Some entities and comments

+

Entities: & © § > some text after entities

+

An invisible Comment: followed by some text

+

An invalid (in HTML) CDATA: followed by some text

+

Margins padding borders

+

Try clicking in the margins, borders and padding. CFI + calculation should fail.

+ +

But I must explain to you how all this mistaken + idea of denouncing pleasure and praising pain was born and I will + give you a complete account of the system, and expound the actual + teachings of the great explorer of the truth, the master-builder of + human happiness. No one rejects, dislikes, or avoids pleasure + itself, because it is pleasure, but because those who do not know + how to pursue pleasure rationally encounter consequences that are + extremely painful. Nor again is there anyone who loves or + pursues or desires to obtain pain of itself, because it is + pain, but because occasionally circumstances occur in which toil + and pain can procure him some great pleasure. To take a trivial + example, which of us ever undertakes laborious physical exercise, + except to obtain some advantage from it? But who has any right to + find fault with a man who chooses to enjoy a pleasure that has no + annoying consequences, or one who avoids a pain that produces no + resultant pleasure? On the other hand, we denounce with righteous + indignation and dislike men who are so beguiled and demoralized by + the charms of pleasure of the moment, so blinded by desire, that + they cannot foresee

+ +

Lots of collapsed whitespace

+

Try clicking the A character after the colon: + A suffix

+ +

Lots of nested/sibling tags

+

A bunch of nested and sibling + tags, all mixed together. Click all + over this paragraph to test things.

+ +

Images

+

Try clicking at different points along the image. Also try + changing the magnification and then hitting reload.

+ Test Image + +

Iframes

+

Try clicking anywhere in the iframe below:

+ + +

Video

+

Try clicking on this video while it is playing. The page should + reload with the video paused at the point it was at when you + clicked. To play the video you should right click on it and select + play (otherwise the click will cause a reload). +

+ + +
+ + + + + diff --git a/src/calibre/ebooks/oeb/display/test-cfi/marker.png b/src/calibre/ebooks/oeb/display/test-cfi/marker.png new file mode 100644 index 0000000000..6dcc1fb7ba Binary files /dev/null and b/src/calibre/ebooks/oeb/display/test-cfi/marker.png differ diff --git a/src/calibre/ebooks/oeb/display/test/test.py b/src/calibre/ebooks/oeb/display/test-cfi/run.py similarity index 67% rename from src/calibre/ebooks/oeb/display/test/test.py rename to src/calibre/ebooks/oeb/display/test-cfi/run.py index 568cffe5e6..17af70bf22 100644 --- a/src/calibre/ebooks/oeb/display/test/test.py +++ b/src/calibre/ebooks/oeb/display/test-cfi/run.py @@ -10,16 +10,15 @@ __docformat__ = 'restructuredtext en' import os try: - from calibre.utils.coffeescript import serve + from calibre.utils.serve_coffee import serve except ImportError: import init_calibre if False: init_calibre, serve - from calibre.utils.coffeescript import serve - + from calibre.utils.serve_coffee import serve def run_devel_server(): - os.chdir(os.path.dirname(__file__)) - serve(['../cfi.coffee', 'cfi-test.coffee']) + os.chdir(os.path.dirname(os.path.abspath(__file__))) + serve(resources={'cfi.coffee':'../cfi.coffee', '/':'index.html'}) if __name__ == '__main__': run_devel_server() diff --git a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee b/src/calibre/ebooks/oeb/display/test/cfi-test.coffee deleted file mode 100644 index 056d24b396..0000000000 --- a/src/calibre/ebooks/oeb/display/test/cfi-test.coffee +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env coffee -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -### - Copyright 2011, Kovid Goyal - Released under the GPLv3 License -### - -viewport_top = (node) -> - $(node).offset().top - window.pageYOffset - -viewport_left = (node) -> - $(node).offset().left - window.pageXOffset - -window.onload = -> - h1 = document.getElementsByTagName('h1')[0] - x = h1.scrollLeft + 150 - y = viewport_top(h1) + h1.offsetHeight/2 - e = document.elementFromPoint x, y - if e.getAttribute('id') != 'first-h1' - alert 'Failed to find top h1' - return - alert window.cfi.at x, y - diff --git a/src/calibre/ebooks/oeb/display/test/test.html b/src/calibre/ebooks/oeb/display/test/test.html deleted file mode 100644 index 71dac39249..0000000000 --- a/src/calibre/ebooks/oeb/display/test/test.html +++ /dev/null @@ -1,14 +0,0 @@ - - - - Testing CFI functionality - - - - - -

Testing CFI functionality

- - - - diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index 57dc18bc32..f0b48afb39 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -17,6 +17,7 @@ from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True) XHTML_NS = 'http://www.w3.org/1999/xhtml' +XMLNS_NS = 'http://www.w3.org/2000/xmlns/' class NotHTML(Exception): @@ -28,9 +29,7 @@ def barename(name): return name.rpartition('}')[-1] def namespace(name): - if '}' in name: - return name.split('}', 1)[0][1:] - return '' + return name.rpartition('}')[0][1:] def XHTML(name): return '{%s}%s' % (XHTML_NS, name) @@ -60,26 +59,106 @@ def merge_multiple_html_heads_and_bodies(root, log=None): log.warn('Merging multiple and sections') return root -def _html5_parse(data): +def clone_element(elem, nsmap={}, in_context=True): + if in_context: + maker = elem.getroottree().getroot().makeelement + else: + maker = etree.Element + nelem = maker(elem.tag, attrib=elem.attrib, + nsmap=nsmap) + nelem.text, nelem.tail = elem.text, elem.tail + nelem.extend(elem) + return nelem + +def node_depth(node): + ans = 0 + p = node.getparent() + while p is not None: + ans += 1 + p = p.getparent() + return ans + +def html5_parse(data, max_nesting_depth=100): import html5lib + # html5lib bug: http://code.google.com/p/html5lib/issues/detail?id=195 + data = re.sub(r'<\s*title\s*[^>]*/\s*>', '', data) + data = html5lib.parse(data, treebuilder='lxml').getroot() - html_ns = [ns for ns, val in data.nsmap.iteritems() if (val == XHTML_NS and - ns is not None)] - if html_ns: - # html5lib causes the XHTML namespace to not - # be set as the default namespace - nsmap = dict(data.nsmap) - nsmap[None] = XHTML_NS - for x in html_ns: - nsmap.pop(x) - nroot = etree.Element(data.tag, nsmap=nsmap, - attrib=dict(data.attrib)) - nroot.text = data.text - nroot.tail = data.tail - for child in data: - nroot.append(child) - data = nroot - return data + + # Check that the asinine HTML 5 algorithm did not result in a tree with + # insane nesting depths + for x in data.iterdescendants(): + if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node + depth = node_depth(x) + if depth > max_nesting_depth: + raise ValueError('html5lib resulted in a tree with nesting' + ' depth > %d'%max_nesting_depth) + # Set lang correctly + xl = data.attrib.pop('xmlU0003Alang', None) + if xl is not None and 'lang' not in data.attrib: + data.attrib['lang'] = xl + + # html5lib has the most inelegant handling of namespaces I have ever seen + # Try to reconstitute destroyed namespace info + xmlns_declaration = '{%s}'%XMLNS_NS + non_html5_namespaces = {} + seen_namespaces = set() + for elem in tuple(data.iter(tag=etree.Element)): + elem.attrib.pop('xmlns', None) + namespaces = {} + for x in tuple(elem.attrib): + if x.startswith('xmlnsU') or x.startswith(xmlns_declaration): + # A namespace declaration + val = elem.attrib.pop(x) + if x.startswith('xmlnsU0003A'): + prefix = x[11:] + namespaces[prefix] = val + + if namespaces: + # Some destroyed namespace declarations were found + p = elem.getparent() + if p is None: + # We handle the root node later + non_html5_namespaces = namespaces + else: + idx = p.index(elem) + p.remove(elem) + elem = clone_element(elem, nsmap=namespaces) + p.insert(idx, elem) + + b = barename(elem.tag) + idx = b.find('U0003A') + if idx > -1: + prefix, tag = b[:idx], b[idx+6:] + ns = elem.nsmap.get(prefix, None) + if ns is None: + ns = non_html5_namespaces.get(prefix, None) + if ns is not None: + elem.tag = '{%s}%s'%(ns, tag) + + for b in tuple(elem.attrib): + idx = b.find('U0003A') + if idx > -1: + prefix, tag = b[:idx], b[idx+6:] + ns = elem.nsmap.get(prefix, None) + if ns is None: + ns = non_html5_namespaces.get(prefix, None) + if ns is not None: + elem.attrib['{%s}%s'%(ns, tag)] = elem.attrib.pop(b) + + seen_namespaces |= set(elem.nsmap.itervalues()) + + nsmap = dict(html5lib.constants.namespaces) + nsmap[None] = nsmap.pop('html') + non_html5_namespaces.update(nsmap) + nsmap = non_html5_namespaces + + data = clone_element(data, nsmap=nsmap, in_context=False) + + # Remove unused namespace declarations + fnsmap = {k:v for k,v in nsmap.iteritems() if v in seen_namespaces and v != + XMLNS_NS} + return clone_element(data, nsmap=fnsmap, in_context=False) def _html4_parse(data, prefer_soup=False): if prefer_soup: @@ -177,7 +256,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, except etree.XMLSyntaxError: log.debug('Parsing %s as HTML' % filename) try: - data = _html5_parse(data) + data = html5_parse(data) except: log.exception( 'HTML 5 parsing failed, falling back to older parsers') @@ -261,6 +340,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, nroot.append(elem) data = nroot + data = merge_multiple_html_heads_and_bodies(data, log) # Ensure has a head = xpath(data, '/h:html/h:head') diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 0337d47f92..6b2cf798ea 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -175,13 +175,27 @@ class OEBReader(object): manifest = self.oeb.manifest known = set(manifest.hrefs) unchecked = set(manifest.values()) + cdoc = OEB_DOCS|OEB_STYLES + invalid = set() while unchecked: new = set() for item in unchecked: + data = None + if (item.media_type in cdoc or + item.media_type[-4:] in ('/xml', '+xml')): + try: + data = item.data + except: + self.oeb.log.exception(u'Failed to read from manifest ' + u'entry with id: %s, ignoring'%item.id) + invalid.add(item) + continue + if data is None: + continue + if (item.media_type in OEB_DOCS or - item.media_type[-4:] in ('/xml', '+xml')) and \ - item.data is not None: - hrefs = [r[2] for r in iterlinks(item.data)] + item.media_type[-4:] in ('/xml', '+xml')): + hrefs = [r[2] for r in iterlinks(data)] for href in hrefs: href, _ = urldefrag(href) if not href: @@ -197,7 +211,7 @@ class OEBReader(object): new.add(href) elif item.media_type in OEB_STYLES: try: - urls = list(cssutils.getUrls(item.data)) + urls = list(cssutils.getUrls(data)) except: urls = [] for url in urls: @@ -231,6 +245,9 @@ class OEBReader(object): added = manifest.add(id, href, media_type) unchecked.add(added) + for item in invalid: + self.oeb.manifest.remove(item) + def _manifest_from_opf(self, opf): manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py index 0ea1c52e2e..14dd27368c 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/pdf/output.py @@ -25,31 +25,30 @@ class PDFOutput(OutputFormatPlugin): file_type = 'pdf' options = set([ - OptionRecommendation(name='unit', recommended_value='inch', - level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(), - help=_('The unit of measure. Default is inch. Choices ' - 'are %s ' - 'Note: This does not override the unit for margins!') % UNITS.keys()), - OptionRecommendation(name='paper_size', recommended_value='letter', - level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(), - help=_('The size of the paper. This size will be overridden when an ' - 'output profile is used. Default is letter. Choices ' - 'are %s') % PAPER_SIZES.keys()), - OptionRecommendation(name='custom_size', recommended_value=None, - help=_('Custom size of the document. Use the form widthxheight ' - 'EG. `123x321` to specify the width and height. ' - 'This overrides any specified paper-size.')), - OptionRecommendation(name='orientation', recommended_value='portrait', - level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(), - help=_('The orientation of the page. Default is portrait. Choices ' - 'are %s') % ORIENTATIONS.keys()), - OptionRecommendation(name='preserve_cover_aspect_ratio', - recommended_value=False, - help=_('Preserve the aspect ratio of the cover, instead' - ' of stretching it to fill the full first page of the' - ' generated pdf.') - ), - ]) + OptionRecommendation(name='unit', recommended_value='inch', + level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(), + help=_('The unit of measure. Default is inch. Choices ' + 'are %s ' + 'Note: This does not override the unit for margins!') % UNITS.keys()), + OptionRecommendation(name='paper_size', recommended_value='letter', + level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(), + help=_('The size of the paper. This size will be overridden when a ' + 'non default output profile is used. Default is letter. Choices ' + 'are %s') % PAPER_SIZES.keys()), + OptionRecommendation(name='custom_size', recommended_value=None, + help=_('Custom size of the document. Use the form widthxheight ' + 'EG. `123x321` to specify the width and height. ' + 'This overrides any specified paper-size.')), + OptionRecommendation(name='orientation', recommended_value='portrait', + level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(), + help=_('The orientation of the page. Default is portrait. Choices ' + 'are %s') % ORIENTATIONS.keys()), + OptionRecommendation(name='preserve_cover_aspect_ratio', + recommended_value=False, + help=_('Preserve the aspect ratio of the cover, instead' + ' of stretching it to fill the full first page of the' + ' generated pdf.')), + ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): self.oeb = oeb_book diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 632ccf230a..c02f972274 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -48,7 +48,7 @@ def get_pdf_printer(opts, for_comic=False): custom_size = get_custom_size(opts) if opts.output_profile.short_name == 'default' or \ - opts.output_profile.width > 10000: + opts.output_profile.width > 9999: if custom_size is None: printer.setPaperSize(paper_size(opts.paper_size)) else: diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index 91cd457c27..4d59668b12 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -47,7 +47,8 @@ class PMLInput(InputFormatPlugin): self.log.debug('Converting PML to HTML...') hizer = PML_HTMLizer() html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path) - html_stream.write('</head><body>%s</body></html>' % html.encode('utf-8', 'replace')) + html = '<html><head><title>%s'%html + html_stream.write(html.encode('utf-8', 'replace')) if pclose: pml_stream.close() diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 7bb23946ca..d4f409d3cc 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -80,7 +80,7 @@ class PML_HTMLizer(object): 'b': ('', ''), 'l': ('', ''), 'k': ('', ''), - 'FN': ('

', '

<return
'), + 'FN': ('

', '

return
'), 'SB': ('

', '

return
'), } @@ -143,7 +143,7 @@ class PML_HTMLizer(object): 'd', 'b', ] - + NEW_LINE_EXCHANGE_STATES = { 'h1': 'h1c', } @@ -230,7 +230,7 @@ class PML_HTMLizer(object): div = [] span = [] other = [] - + for key, val in state.items(): if key in self.NEW_LINE_EXCHANGE_STATES and val[0]: state[self.NEW_LINE_EXCHANGE_STATES[key]] = val @@ -644,7 +644,7 @@ class PML_HTMLizer(object): empty_count = 0 text = self.end_line() parsed.append(text) - + # Basic indent will be set if the \t starts the line or # if we are in a continuing \t block. if basic_indent: @@ -666,7 +666,7 @@ class PML_HTMLizer(object): parsed.append(self.STATES_TAGS['T'][1]) indent_state['T'] = False adv_indent_val = '' - + output.append(u''.join(parsed)) line.close() @@ -677,7 +677,7 @@ class PML_HTMLizer(object): def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. - + This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 99f7035800..49c8a2129d 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin): name = 'TXT Input' author = 'John Schember' description = 'Convert TXT files to HTML' - file_types = set(['txt', 'txtz', 'text']) + file_types = set(['txt', 'txtz', 'text', 'md', 'textile', 'markdown']) options = set([ OptionRecommendation(name='paragraph_type', recommended_value='auto', @@ -77,6 +77,11 @@ class TXTInput(InputFormatPlugin): txt += tf.read() + '\n\n' else: txt = stream.read() + if file_ext in {'md', 'textile', 'markdown'}: + options.formatting_type = {'md': 'markdown'}.get(file_ext, file_ext) + log.info('File extension indicates particular formatting. ' + 'Forcing formatting type to: %s'%options.formatting_type) + options.paragraph_type = 'off' # Get the encoding of the document. if options.input_encoding: diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 47d7748eb9..2b99bcb2c2 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -132,7 +132,7 @@ def _config(): # {{{ c.add_opt('LRF_ebook_viewer_options', default=None, help=_('Options for the LRF ebook viewer')) c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT', - 'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB'], + 'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'], help=_('Formats that are viewed using the internal viewer')) c.add_opt('column_map', default=ALL_COLUMNS, help=_('Columns to be displayed in the book list')) @@ -273,11 +273,34 @@ def error_dialog(parent, title, msg, det_msg='', show=False, return d def question_dialog(parent, title, msg, det_msg='', show_copy_button=False, - default_yes=True): + default_yes=True, + # Skippable dialogs + # Set skip_dialog_name to a unique name for this dialog + # Set skip_dialog_msg to a message displayed to the user + skip_dialog_name=None, skip_dialog_msg=_('Show this confirmation again'), + skip_dialog_skipped_value=True, skip_dialog_skip_precheck=True): from calibre.gui2.dialogs.message_box import MessageBox + + auto_skip = set(gprefs.get('questions_to_auto_skip', [])) + if (skip_dialog_name is not None and skip_dialog_name in auto_skip): + return bool(skip_dialog_skipped_value) + d = MessageBox(MessageBox.QUESTION, title, msg, det_msg, parent=parent, show_copy_button=show_copy_button, default_yes=default_yes) - return d.exec_() == d.Accepted + + if skip_dialog_name is not None and skip_dialog_msg: + tc = d.toggle_checkbox + tc.setVisible(True) + tc.setText(skip_dialog_msg) + tc.setChecked(bool(skip_dialog_skip_precheck)) + + ret = d.exec_() == d.Accepted + + if skip_dialog_name is not None and not d.toggle_checkbox.isChecked(): + auto_skip.add(skip_dialog_name) + gprefs.set('questions_to_auto_skip', list(auto_skip)) + + return ret def info_dialog(parent, title, msg, det_msg='', show=False, show_copy_button=True): diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index 8c515d4c3b..03a2e44c6e 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -91,7 +91,7 @@ class AddAction(InterfaceAction): _('Are you sure'), _('Are you sure you want to add the same' ' files to all %d books? If the format' - 'already exists for a book, it will be replaced.')%len(ids)): + ' already exists for a book, it will be replaced.')%len(ids)): return books = choose_files(self.gui, 'add formats dialog dir', diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py index 383b3e54d8..dd25b83d50 100644 --- a/src/calibre/gui2/actions/copy_to_library.py +++ b/src/calibre/gui2/actions/copy_to_library.py @@ -175,7 +175,6 @@ class CopyToLibraryAction(InterfaceAction): return error_dialog(self.gui, _('No library'), _('No library found at %s')%loc, show=True) - self.pd = ProgressDialog(_('Copying'), min=0, max=len(ids)-1, parent=self.gui, cancelable=False) diff --git a/src/calibre/gui2/actions/tweak_epub.py b/src/calibre/gui2/actions/tweak_epub.py index d5ee346d31..02fc327f38 100755 --- a/src/calibre/gui2/actions/tweak_epub.py +++ b/src/calibre/gui2/actions/tweak_epub.py @@ -10,12 +10,13 @@ import os from calibre.gui2 import error_dialog from calibre.gui2.actions import InterfaceAction from calibre.gui2.dialogs.tweak_epub import TweakEpub +from calibre.utils.config import tweaks class TweakEpubAction(InterfaceAction): name = 'Tweak ePub' - action_spec = (_('Tweak ePub'), 'trim.png', - _('Make small changes to ePub format books'), + action_spec = (_('Tweak Book'), 'trim.png', + _('Make small changes to ePub or HTMLZ format books'), _('T')) dont_add_to = frozenset(['context-menu-device']) action_type = 'current' @@ -26,33 +27,48 @@ class TweakEpubAction(InterfaceAction): def edit_epub_in_situ(self, *args): row = self.gui.library_view.currentIndex() if not row.isValid(): - return error_dialog(self.gui, _('Cannot tweak ePub'), + return error_dialog(self.gui, _('Cannot tweak Book'), _('No book selected'), show=True) - # Confirm 'EPUB' in formats book_id = self.gui.library_view.model().id(row) + + # Confirm 'EPUB' in formats try: path_to_epub = self.gui.library_view.model().db.format( book_id, 'EPUB', index_is_id=True, as_path=True) except: path_to_epub = None - if not path_to_epub: - return error_dialog(self.gui, _('Cannot tweak ePub'), - _('No ePub available. First convert the book to ePub.'), + # Confirm 'HTMLZ' in formats + try: + path_to_htmlz = self.gui.library_view.model().db.format( + book_id, 'HTMLZ', index_is_id=True, as_path=True) + except: + path_to_htmlz = None + + if not path_to_epub and not path_to_htmlz: + return error_dialog(self.gui, _('Cannot tweak Book'), + _('The book must be in ePub or HTMLZ format to tweak.' + '\n\nFirst convert the book to ePub or HTMLZ.'), show=True) # Launch modal dialog waiting for user to tweak or cancel - dlg = TweakEpub(self.gui, path_to_epub) + if tweaks['tweak_book_prefer'] == 'htmlz': + path_to_book = path_to_htmlz or path_to_epub + else: + path_to_book = path_to_epub or path_to_htmlz + + dlg = TweakEpub(self.gui, path_to_book) if dlg.exec_() == dlg.Accepted: self.update_db(book_id, dlg._output) dlg.cleanup() - os.remove(path_to_epub) + os.remove(path_to_book) def update_db(self, book_id, rebuilt): ''' Update the calibre db with the tweaked epub ''' - self.gui.library_view.model().db.add_format(book_id, 'EPUB', + fmt = os.path.splitext(rebuilt)[1][1:].upper() + self.gui.library_view.model().db.add_format(book_id, fmt, open(rebuilt, 'rb'), index_is_id=True) diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 84060de786..43e9dad5c4 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -14,7 +14,7 @@ from calibre.constants import isosx from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \ open_local_file, info_dialog from calibre.gui2.dialogs.choose_format import ChooseFormatDialog -from calibre.utils.config import prefs +from calibre.utils.config import prefs, tweaks from calibre.ptempfile import PersistentTemporaryFile from calibre.gui2.actions import InterfaceAction @@ -239,6 +239,7 @@ class ViewAction(InterfaceAction): def update_history(self, views, remove=frozenset()): db = self.gui.current_db + vh = tweaks['gui_view_history_size'] if views: seen = set() history = [] @@ -247,12 +248,12 @@ class ViewAction(InterfaceAction): seen.add(title) history.append((id_, title)) - db.prefs['gui_view_history'] = history[:10] + db.prefs['gui_view_history'] = history[:vh] self.build_menus(db) if remove: history = db.prefs.get('gui_view_history', []) history = [x for x in history if x[0] not in remove] - db.prefs['gui_view_history'] = history[:10] + db.prefs['gui_view_history'] = history[:vh] self.build_menus(db) def _view_books(self, rows): diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 2fc14c8238..7cdac3b845 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata import MetaInformation from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG from calibre.utils.config import prefs -from calibre import prints +from calibre import prints, force_unicode, as_unicode single_shot = partial(QTimer.singleShot, 75) @@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{ if self.canceled: return self.update.emit( - _('Searching in')+' '+dirpath[0]) + _('Searching in')+' '+force_unicode(dirpath[0], + filesystem_encoding)) self.books += list(self.db.find_books_in_directory(dirpath[0], self.single_book_per_directory)) @@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{ except Exception as err: import traceback traceback.print_exc() - try: - msg = unicode(err) - except: - msg = repr(err) + msg = as_unicode(err) self.found.emit(msg) return diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index bdcf9ede05..73b478ac47 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -43,6 +43,9 @@ class Widget(QWidget): ICON = I('config.png') HELP = '' COMMIT_NAME = None + # If True, leading and trailing spaces are removed from line and text edit + # fields + STRIP_TEXT_FIELDS = True changed_signal = pyqtSignal() set_help = pyqtSignal(object) @@ -77,7 +80,6 @@ class Widget(QWidget): self._options, only_existing=True) defaults.update(specifics) - self.apply_recommendations(defaults) self.setup_help(get_help) @@ -124,7 +126,6 @@ class Widget(QWidget): if name in getattr(recs, 'disabled_options', []): gui_opt.setDisabled(True) - def get_value(self, g): from calibre.gui2.convert.xpath_wizard import XPathEdit from calibre.gui2.convert.regex_builder import RegexEdit @@ -136,7 +137,9 @@ class Widget(QWidget): return g.value() elif isinstance(g, (QLineEdit, QTextEdit)): func = getattr(g, 'toPlainText', getattr(g, 'text', None))() - ans = unicode(func).strip() + ans = unicode(func) + if self.STRIP_TEXT_FIELDS: + ans = ans.strip() if not ans: ans = None return ans diff --git a/src/calibre/gui2/convert/pdf_output.py b/src/calibre/gui2/convert/pdf_output.py index 1c526939c2..940d6b68f8 100644 --- a/src/calibre/gui2/convert/pdf_output.py +++ b/src/calibre/gui2/convert/pdf_output.py @@ -18,14 +18,14 @@ class PluginWidget(Widget, Ui_Form): ICON = I('mimetypes/pdf.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): - Widget.__init__(self, parent, ['paper_size', + Widget.__init__(self, parent, ['paper_size', 'custom_size', 'orientation', 'preserve_cover_aspect_ratio']) self.db, self.book_id = db, book_id - + for x in get_option('paper_size').option.choices: self.opt_paper_size.addItem(x) for x in get_option('orientation').option.choices: self.opt_orientation.addItem(x) - + self.initialize_options(get_option, get_help, db, book_id) - \ No newline at end of file + diff --git a/src/calibre/gui2/convert/pdf_output.ui b/src/calibre/gui2/convert/pdf_output.ui index 0adb8df495..738e75aff5 100644 --- a/src/calibre/gui2/convert/pdf_output.ui +++ b/src/calibre/gui2/convert/pdf_output.ui @@ -40,7 +40,7 @@ - + Qt::Vertical @@ -53,13 +53,26 @@ - + Preserve &aspect ratio of cover + + + + &Custom size: + + + opt_custom_size + + + + + + diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index 407e7922e7..b9e2644008 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en' import re -from PyQt4.Qt import QLineEdit, QTextEdit - from calibre.gui2.convert.search_and_replace_ui import Ui_Form from calibre.gui2.convert import Widget from calibre.gui2 import error_dialog @@ -18,6 +16,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form): HELP = _('Modify the document text and structure using user defined patterns.') COMMIT_NAME = 'search_and_replace' ICON = I('search.png') + STRIP_TEXT_FIELDS = False def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, @@ -74,13 +73,5 @@ class SearchAndReplaceWidget(Widget, Ui_Form): _('Invalid regular expression: %s')%err, show=True) return False return True - - def get_vaule(self, g): - if isinstance(g, (QLineEdit, QTextEdit)): - func = getattr(g, 'toPlainText', getattr(g, 'text', None))() - ans = unicode(func) - if not ans: - ans = None - return ans - else: - return Widget.get_value(self, g) + + diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py index 486f2c540d..76fdc62d68 100644 --- a/src/calibre/gui2/custom_column_widgets.py +++ b/src/calibre/gui2/custom_column_widgets.py @@ -10,12 +10,13 @@ from functools import partial from PyQt4.Qt import QComboBox, QLabel, QSpinBox, QDoubleSpinBox, QDateTimeEdit, \ QDateTime, QGroupBox, QVBoxLayout, QSizePolicy, QGridLayout, \ QSpacerItem, QIcon, QCheckBox, QWidget, QHBoxLayout, SIGNAL, \ - QPushButton + QPushButton, QMessageBox, QToolButton from calibre.utils.date import qt_to_dt, now from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox from calibre.gui2.comments_editor import Editor as CommentsEditor from calibre.gui2 import UNDEFINED_QDATETIME, error_dialog +from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.utils.config import tweaks from calibre.utils.icu import sort_key from calibre.library.comments import comments_to_html @@ -226,18 +227,71 @@ class Comments(Base): val = None return val +class MultipleWidget(QWidget): + + def __init__(self, parent): + QWidget.__init__(self, parent) + layout = QHBoxLayout() + layout.setSpacing(5) + layout.setContentsMargins(0, 0, 0, 0) + + self.tags_box = MultiCompleteLineEdit(parent) + layout.addWidget(self.tags_box, stretch=1000) + self.editor_button = QToolButton(self) + self.editor_button.setToolTip(_('Open Item Editor')) + self.editor_button.setIcon(QIcon(I('chapters.png'))) + layout.addWidget(self.editor_button) + self.setLayout(layout) + + def get_editor_button(self): + return self.editor_button + + def update_items_cache(self, values): + self.tags_box.update_items_cache(values) + + def clear(self): + self.tags_box.clear() + + def setEditText(self): + self.tags_box.setEditText() + + def addItem(self, itm): + self.tags_box.addItem(itm) + + def set_separator(self, sep): + self.tags_box.set_separator(sep) + + def set_add_separator(self, sep): + self.tags_box.set_add_separator(sep) + + def set_space_before_sep(self, v): + self.tags_box.set_space_before_sep(v) + + def setSizePolicy(self, v1, v2): + self.tags_box.setSizePolicy(v1, v2) + + def setText(self, v): + self.tags_box.setText(v) + + def text(self): + return self.tags_box.text() + class Text(Base): def setup_ui(self, parent): self.sep = self.col_metadata['multiple_seps'] + self.key = self.db.field_metadata.label_to_key(self.col_metadata['label'], + prefer_custom=True) + self.parent = parent if self.col_metadata['is_multiple']: - w = MultiCompleteLineEdit(parent) + w = MultipleWidget(parent) w.set_separator(self.sep['ui_to_list']) if self.sep['ui_to_list'] == '&': w.set_space_before_sep(True) w.set_add_separator(tweaks['authors_completer_append_separator']) w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred) + w.get_editor_button().clicked.connect(self.edit) else: w = MultiCompleteComboBox(parent) w.set_separator(None) @@ -248,9 +302,12 @@ class Text(Base): def initialize(self, book_id): values = list(self.db.all_custom(num=self.col_id)) values.sort(key=sort_key) + self.book_id = book_id self.widgets[1].clear() self.widgets[1].update_items_cache(values) val = self.db.get_custom(book_id, num=self.col_id, index_is_id=True) + if isinstance(val, list): + val.sort(key=sort_key) self.initial_val = val val = self.normalize_db_val(val) @@ -284,6 +341,31 @@ class Text(Base): val = None return val + def _save_dialog(self, parent, title, msg, det_msg=''): + d = QMessageBox(parent) + d.setWindowTitle(title) + d.setText(msg) + d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel) + return d.exec_() + + def edit(self): + if self.getter() != self.initial_val: + d = self._save_dialog(self.parent, _('Values changed'), + _('You have changed the values. In order to use this ' + 'editor, you must either discard or apply these ' + 'changes. Apply changes?')) + if d == QMessageBox.Cancel: + return + if d == QMessageBox.Yes: + self.commit(self.book_id) + self.db.commit() + self.initial_val = self.getter() + else: + self.setter(self.initial_val) + d = TagEditor(self.parent, self.db, self.book_id, self.key) + if d.exec_() == TagEditor.Accepted: + self.setter(d.tags) + class Series(Base): def setup_ui(self, parent): diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 51e4a1a8c1..70640444f4 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -683,7 +683,7 @@ class DeviceMixin(object): # {{{ return self.ask_a_yes_no_question( _('No suitable formats'), msg, ans_when_user_unavailable=True, - det_msg=autos + det_msg=autos, skip_dialog_name='auto_convert_before_send' ) def set_default_thumbnail(self, height): @@ -751,7 +751,7 @@ class DeviceMixin(object): # {{{ error_dialog(self, _('Error talking to device'), _('There was a temporary error talking to the ' 'device. Please unplug and reconnect the device ' - 'and or reboot.')).show() + 'or reboot.')).show() return except: pass diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index 1705839bb4..f3c1022ba7 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -5,9 +5,10 @@ __license__ = 'GPL v3' import os, shutil -from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \ - QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \ - QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit +from PyQt4.Qt import (QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, + QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, + QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit, + QSplitter, QWidget) from calibre.gui2.dialogs.confirm_delete import confirm from calibre.library.check_library import CheckLibrary, CHECKS @@ -149,11 +150,15 @@ class CheckLibraryDialog(QDialog): self.setWindowIcon(QIcon(I('debug.png'))) self._tl = QHBoxLayout() - self._layout = QVBoxLayout() self.setLayout(self._tl) - self._tl.addLayout(self._layout) + self.splitter = QSplitter(self) + self.left = QWidget(self) + self.splitter.addWidget(self.left) self.helpw = QTextEdit(self) - self._tl.addWidget(self.helpw) + self.splitter.addWidget(self.helpw) + self._tl.addWidget(self.splitter) + self._layout = QVBoxLayout() + self.left.setLayout(self._layout) self.helpw.setReadOnly(True) self.helpw.setText(_('''\

Help

diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py index 7fd7a0addf..84e5569a88 100644 --- a/src/calibre/gui2/dialogs/message_box.py +++ b/src/calibre/gui2/dialogs/message_box.py @@ -44,6 +44,7 @@ class MessageBox(QDialog, Ui_Dialog): # {{{ self.msg.setText(msg) self.det_msg.setPlainText(det_msg) self.det_msg.setVisible(False) + self.toggle_checkbox.setVisible(False) if show_copy_button: self.ctc_button = self.bb.addButton(_('&Copy to clipboard'), diff --git a/src/calibre/gui2/dialogs/message_box.ui b/src/calibre/gui2/dialogs/message_box.ui index 136e6d250e..8064eac256 100644 --- a/src/calibre/gui2/dialogs/message_box.ui +++ b/src/calibre/gui2/dialogs/message_box.ui @@ -53,7 +53,7 @@
- + Qt::Horizontal @@ -63,6 +63,13 @@ + + + + + + + diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui index 9309e9a41d..c9467ffaf0 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.ui +++ b/src/calibre/gui2/dialogs/metadata_bulk.ui @@ -847,7 +847,7 @@ Future conversion of these books will use the default settings. - Enter the what you are looking for, either plain text or a regular expression, depending on the mode + Enter what you are looking for, either plain text or a regular expression, depending on the mode diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py index 23d6bbcef4..d57d514d54 100644 --- a/src/calibre/gui2/dialogs/scheduler.py +++ b/src/calibre/gui2/dialogs/scheduler.py @@ -214,6 +214,9 @@ class SchedulerDialog(QDialog, Ui_Dialog): self.recipes.setModel(self.recipe_model) self.detail_box.setVisible(False) + self.download_button = self.buttonBox.addButton(_('&Download now'), + self.buttonBox.ActionRole) + self.download_button.setIcon(QIcon(I('arrow-down.png'))) self.download_button.setVisible(False) self.recipes.currentChanged = self.current_changed for b, c in self.SCHEDULE_TYPES.iteritems(): @@ -371,7 +374,8 @@ class SchedulerDialog(QDialog, Ui_Dialog): '''%dict(title=recipe.get('title'), cb=_('Created by: '), author=recipe.get('author', _('Unknown')), description=recipe.get('description', ''))) - + self.download_button.setToolTip( + _('Download %s now')%recipe.get('title')) scheduled = schedule_info is not None self.schedule.setChecked(scheduled) self.toggle_schedule_info() @@ -458,7 +462,7 @@ class Scheduler(QObject): delta = timedelta(days=self.oldest) try: ids = list(self.db.tags_older_than(_('News'), - delta)) + delta, must_have_authors=['calibre'])) except: # Happens if library is being switched ids = [] diff --git a/src/calibre/gui2/dialogs/scheduler.ui b/src/calibre/gui2/dialogs/scheduler.ui index 6acbb01dd8..5198775bf0 100644 --- a/src/calibre/gui2/dialogs/scheduler.ui +++ b/src/calibre/gui2/dialogs/scheduler.ui @@ -53,8 +53,8 @@ 0 0 - 524 - 504 + 518 + 498 @@ -318,13 +318,6 @@
- - - - &Download now - - - @@ -369,7 +362,7 @@ - &Delete downloaded news older than: + Delete downloaded news &older than: old_news diff --git a/src/calibre/gui2/dialogs/tag_categories.ui b/src/calibre/gui2/dialogs/tag_categories.ui index e6fedf9bde..5b67c42a8a 100644 --- a/src/calibre/gui2/dialogs/tag_categories.ui +++ b/src/calibre/gui2/dialogs/tag_categories.ui @@ -118,7 +118,7 @@ - Rename the current category to the what is in the box + Rename the current category to what is in the box ... diff --git a/src/calibre/gui2/dialogs/tag_editor.py b/src/calibre/gui2/dialogs/tag_editor.py index bf3bb9fd4e..843711713b 100644 --- a/src/calibre/gui2/dialogs/tag_editor.py +++ b/src/calibre/gui2/dialogs/tag_editor.py @@ -10,19 +10,26 @@ from calibre.utils.icu import sort_key class TagEditor(QDialog, Ui_TagEditor): - def __init__(self, window, db, id_=None): + def __init__(self, window, db, id_=None, key=None): QDialog.__init__(self, window) Ui_TagEditor.__init__(self) self.setupUi(self) self.db = db + if key: + key = db.field_metadata.key_to_label(key) + self.key = key self.index = db.row(id_) if id_ is not None else None if self.index is not None: - tags = self.db.tags(self.index) + if key is None: + tags = self.db.tags(self.index) + if tags: + tags = [tag.strip() for tag in tags.split(',') if tag.strip()] + else: + tags = self.db.get_custom(self.index, label=key) else: tags = [] if tags: - tags = [tag.strip() for tag in tags.split(',') if tag.strip()] tags.sort(key=sort_key) for tag in tags: self.applied_tags.addItem(tag) @@ -31,7 +38,10 @@ class TagEditor(QDialog, Ui_TagEditor): self.tags = tags - all_tags = [tag for tag in self.db.all_tags()] + if key: + all_tags = [tag for tag in self.db.all_custom(label=key)] + else: + all_tags = [tag for tag in self.db.all_tags()] all_tags = list(set(all_tags)) all_tags.sort(key=sort_key) for tag in all_tags: @@ -61,7 +71,10 @@ class TagEditor(QDialog, Ui_TagEditor): error_dialog(self, 'No tags selected', 'You must select at least one tag from the list of Available tags.').exec_() return for item in items: - if self.db.is_tag_used(unicode(item.text())): + used = self.db.is_tag_used(unicode(item.text())) \ + if self.key is None else \ + self.db.is_item_used_in_multiple(unicode(item.text()), label=self.key) + if used: confirms.append(item) else: deletes.append(item) @@ -73,7 +86,12 @@ class TagEditor(QDialog, Ui_TagEditor): deletes += confirms for item in deletes: - self.db.delete_tag(unicode(item.text())) + if self.key is None: + self.db.delete_tag(unicode(item.text())) + else: + bks = self.db.delete_item_from_multiple(unicode(item.text()), + label=self.key) + self.db.refresh_ids(bks) self.available_tags.takeItem(self.available_tags.row(item)) diff --git a/src/calibre/gui2/dialogs/template_dialog.ui b/src/calibre/gui2/dialogs/template_dialog.ui index 9037e70b94..0acfc0f0f8 100644 --- a/src/calibre/gui2/dialogs/template_dialog.ui +++ b/src/calibre/gui2/dialogs/template_dialog.ui @@ -77,7 +77,7 @@ template_value - The value the of the template using the current book in the library view + The value of the template using the current book in the library view diff --git a/src/calibre/gui2/dialogs/tweak_epub.py b/src/calibre/gui2/dialogs/tweak_epub.py index e0be9fa1e9..503b1f45d3 100755 --- a/src/calibre/gui2/dialogs/tweak_epub.py +++ b/src/calibre/gui2/dialogs/tweak_epub.py @@ -7,6 +7,7 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' import os, shutil +from itertools import repeat, izip from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED from PyQt4.Qt import QDialog @@ -30,9 +31,20 @@ class TweakEpub(QDialog, Ui_Dialog): self._epub = epub self._exploded = None self._output = None + self.ishtmlz = epub.lower().endswith('.htmlz') + self.rebuilt_name = 'rebuilt.' + ('htmlz' if self.ishtmlz else 'epub') # Run the dialog setup generated from tweak_epub.ui self.setupUi(self) + for x, props in [(self, ['windowTitle']), (self.label, ['text'])]+\ + list(izip([self.cancel_button, self.explode_button, + self.rebuild_button, self.preview_button], + repeat(['text', 'statusTip', 'toolTip']))): + for prop in props: + val = unicode(getattr(x, prop)()) + val = val.format('HTMLZ' if self.ishtmlz else 'ePub') + prop = 'set' + prop[0].upper() + prop[1:] + getattr(x, prop)(val) self.cancel_button.clicked.connect(self.reject) self.explode_button.clicked.connect(self.explode) @@ -83,9 +95,11 @@ class TweakEpub(QDialog, Ui_Dialog): def do_rebuild(self, src): with ZipFile(src, 'w', compression=ZIP_DEFLATED) as zf: # Write mimetype - zf.write(os.path.join(self._exploded,'mimetype'), 'mimetype', compress_type=ZIP_STORED) + mt = os.path.join(self._exploded, 'mimetype') + if os.path.exists(mt): + zf.write(mt, 'mimetype', compress_type=ZIP_STORED) # Write everything else - exclude_files = ['.DS_Store','mimetype','iTunesMetadata.plist','rebuilt.epub'] + exclude_files = ['.DS_Store','mimetype','iTunesMetadata.plist',self.rebuilt_name] for root, dirs, files in os.walk(self._exploded): for fn in files: if fn in exclude_files: @@ -97,11 +111,11 @@ class TweakEpub(QDialog, Ui_Dialog): def preview(self): if not self._exploded: - return error_dialog(self, _('Cannot preview'), - _('You must first explode the epub before previewing.'), - show=True) + msg = _('You must first explode the %s before previewing.') + msg = msg%('HTMLZ' if self.ishtmlz else 'ePub') + return error_dialog(self, _('Cannot preview'), msg, show=True) - tf = PersistentTemporaryFile('.epub') + tf = PersistentTemporaryFile('.htmlz' if self.ishtmlz else '.epub') tf.close() self._preview_files.append(tf.name) @@ -110,7 +124,7 @@ class TweakEpub(QDialog, Ui_Dialog): self.gui.iactions['View']._view_file(tf.name) def rebuild(self, *args): - self._output = os.path.join(self._exploded, 'rebuilt.epub') + self._output = os.path.join(self._exploded, self.rebuilt_name) self.do_rebuild(self._output) return QDialog.accept(self) diff --git a/src/calibre/gui2/dialogs/tweak_epub.ui b/src/calibre/gui2/dialogs/tweak_epub.ui index a59af4fde1..9f14a1b275 100644 --- a/src/calibre/gui2/dialogs/tweak_epub.ui +++ b/src/calibre/gui2/dialogs/tweak_epub.ui @@ -14,7 +14,7 @@ - Tweak ePub + Tweak {0} false @@ -26,7 +26,7 @@ - <p>Explode the ePub to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window <b>and the editor windows you used to edit files in the epub</b>.</p><p>Rebuild the ePub, updating your calibre library.</p> + <p>Explode the {0} to display contents in a file browser window. To tweak individual files, right-click, then 'Open with...' your editor of choice. When tweaks are complete, close the file browser window <b>and the editor windows you used to edit files in the ePub</b>.</p><p>Rebuild the ePub, updating your calibre library.</p> true @@ -35,11 +35,14 @@ + + Display contents of exploded {0} + - Display contents of exploded ePub + Display contents of exploded {0} - &Explode ePub + &Explode {0} @@ -49,6 +52,9 @@ + + Discard changes + Discard changes @@ -66,11 +72,14 @@ false + + Rebuild {0} from exploded contents + - Rebuild ePub from exploded contents + Rebuild {0} from exploded contents - &Rebuild ePub + &Rebuild {0} @@ -81,7 +90,7 @@ - &Preview ePub + &Preview {0} diff --git a/src/calibre/gui2/email.py b/src/calibre/gui2/email.py index 0879823574..d505baecc1 100644 --- a/src/calibre/gui2/email.py +++ b/src/calibre/gui2/email.py @@ -157,6 +157,9 @@ def email_news(mi, remove, get_fmts, done, job_manager): sent_mails.append(to_s[0]) return sent_mails +plugboard_email_value = 'email' +plugboard_email_formats = ['epub', 'mobi'] + class EmailMixin(object): # {{{ def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None, @@ -164,10 +167,13 @@ class EmailMixin(object): # {{{ ids = [self.library_view.model().id(r) for r in self.library_view.selectionModel().selectedRows()] if send_ids is None else send_ids if not ids or len(ids) == 0: return + files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(ids, fmts, set_metadata=True, specific_format=specific_format, - exclude_auto=do_auto_convert) + exclude_auto=do_auto_convert, + use_plugboard=plugboard_email_value, + plugboard_formats=plugboard_email_formats) if do_auto_convert: nids = list(set(ids).difference(_auto_ids)) ids = [i for i in ids if i in nids] diff --git a/src/calibre/gui2/init.py b/src/calibre/gui2/init.py index cccf76e713..95e8c79382 100644 --- a/src/calibre/gui2/init.py +++ b/src/calibre/gui2/init.py @@ -5,14 +5,14 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import functools, sys, os +import functools from PyQt4.Qt import Qt, QStackedWidget, QMenu, \ QSize, QSizePolicy, QStatusBar, QLabel, QFont from calibre.utils.config import prefs -from calibre.constants import isosx, __appname__, preferred_encoding, \ - __version__ +from calibre.constants import (isosx, __appname__, preferred_encoding, + get_version) from calibre.gui2 import config, is_widescreen, gprefs from calibre.gui2.library.views import BooksView, DeviceBooksView from calibre.gui2.widgets import Splitter @@ -187,11 +187,7 @@ class StatusBar(QStatusBar): # {{{ self.clearMessage() def get_version(self): - dv = os.environ.get('CALIBRE_DEVELOP_FROM', None) - v = __version__ - if getattr(sys, 'frozen', False) and dv and os.path.abspath(dv) in sys.path: - v += '*' - return v + return get_version() def show_message(self, msg, timeout=0): self.showMessage(msg, timeout) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 64d9eda301..d10ee4d633 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -22,6 +22,7 @@ from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import SearchQueryParser from calibre.library.caches import (_match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH, MetadataBackup, force_to_bool) +from calibre.library.save_to_disk import find_plugboard from calibre import strftime, isbytestring from calibre.constants import filesystem_encoding, DEBUG from calibre.gui2.library import DEFAULT_SORT @@ -429,7 +430,8 @@ class BooksModel(QAbstractTableModel): # {{{ def get_preferred_formats_from_ids(self, ids, formats, set_metadata=False, specific_format=None, - exclude_auto=False, mode='r+b'): + exclude_auto=False, mode='r+b', + use_plugboard=None, plugboard_formats=None): from calibre.ebooks.metadata.meta import set_metadata as _set_metadata ans = [] need_auto = [] @@ -453,9 +455,21 @@ class BooksModel(QAbstractTableModel): # {{{ pt.seek(0) if set_metadata: try: - _set_metadata(pt, self.db.get_metadata( - id, get_cover=True, index_is_id=True, - cover_as_data=True), format) + mi = self.db.get_metadata(id, get_cover=True, + index_is_id=True, + cover_as_data=True) + newmi = None + if use_plugboard and format.lower() in plugboard_formats: + plugboards = self.db.prefs.get('plugboards', {}) + cpb = find_plugboard(use_plugboard, format.lower(), + plugboards) + if cpb: + newmi = mi.deepcopy_metadata() + newmi.template_to_attribute(mi, cpb) + if newmi is not None: + _set_metadata(pt, newmi, format) + else: + _set_metadata(pt, mi, format) except: traceback.print_exc() pt.close() diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index 3244a35545..cb73b5ddf0 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -51,8 +51,7 @@ class PreserveViewState(object): # {{{ traceback.print_exc() def __exit__(self, *args): - current = self.view.get_selected_ids() - if not current and self.selected_ids: + if self.selected_ids: if self.current_id is not None: self.view.current_id = self.current_id self.view.select_rows(self.selected_ids, using_ids=True, diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 539110fb92..9e1b6b4b24 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -143,12 +143,12 @@ class GuiRunner(QObject): add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False) sys.excepthook = main.unhandled_exception if len(self.args) > 1: - p = os.path.abspath(self.args[1]) - if os.path.isdir(p): - prints('Ignoring directory passed as command line argument:', - self.args[1]) - else: - add_filesystem_book(p) + files = [os.path.abspath(p) for p in self.args[1:] if not + os.path.isdir(p)] + if len(files) < len(sys.argv[1:]): + prints('Ignoring directories passed as command line arguments') + if files: + add_filesystem_book(files) self.app.file_event_hook = add_filesystem_book self.main = main diff --git a/src/calibre/gui2/preferences/behavior.py b/src/calibre/gui2/preferences/behavior.py index 1247c54ec9..b5070cbdea 100644 --- a/src/calibre/gui2/preferences/behavior.py +++ b/src/calibre/gui2/preferences/behavior.py @@ -162,6 +162,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): for key in dynamic.keys(): if key.endswith('_again') and dynamic[key] is False: dynamic[key] = True + gprefs['questions_to_auto_skip'] = [] info_dialog(self, _('Done'), _('Confirmation dialogs have all been reset'), show=True) diff --git a/src/calibre/gui2/preferences/conversion.py b/src/calibre/gui2/preferences/conversion.py index b5240227d3..a15ee0a750 100644 --- a/src/calibre/gui2/preferences/conversion.py +++ b/src/calibre/gui2/preferences/conversion.py @@ -117,6 +117,6 @@ if __name__ == '__main__': from PyQt4.Qt import QApplication app = QApplication([]) #test_widget('Conversion', 'Input Options') - test_widget('Conversion', 'Common Options') - #test_widget('Conversion', 'Output Options') + #test_widget('Conversion', 'Common Options') + test_widget('Conversion', 'Output Options') diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py index 587db0fd79..78a641ea61 100644 --- a/src/calibre/gui2/preferences/plugboard.py +++ b/src/calibre/gui2/preferences/plugboard.py @@ -21,6 +21,7 @@ from calibre.library.save_to_disk import plugboard_any_format_value, \ find_plugboard from calibre.library.server.content import plugboard_content_server_value, \ plugboard_content_server_formats +from calibre.gui2.email import plugboard_email_value, plugboard_email_formats from calibre.utils.formatter import validation_formatter @@ -68,6 +69,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.devices.insert(1, plugboard_content_server_value) self.device_to_formats_map[plugboard_content_server_value] = \ plugboard_content_server_formats + self.devices.insert(1, plugboard_email_value) + self.device_to_formats_map[plugboard_email_value] = \ + plugboard_email_formats self.devices.insert(1, plugboard_any_device_value) self.new_device.addItems(self.devices) @@ -321,8 +325,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.clear_fields(new_boxes=True) self.edit_format.clear() self.edit_format.addItem('') - for format in self.current_plugboards: - self.edit_format.addItem(format) + for format_ in self.current_plugboards: + self.edit_format.addItem(format_) self.edit_format.setCurrentIndex(0) self.edit_device.clear() self.ok_button.setEnabled(False) diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py index 20507b4ce1..70a8a82311 100644 --- a/src/calibre/gui2/preferences/plugins.py +++ b/src/calibre/gui2/preferences/plugins.py @@ -20,6 +20,7 @@ from calibre.gui2 import (NONE, error_dialog, info_dialog, choose_files, question_dialog, gprefs) from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.icu import lower +from calibre.constants import iswindows class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{ @@ -272,8 +273,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.modify_plugin(op='remove') def add_plugin(self): + info = '' if iswindows else ' [.zip %s]'%_('files') path = choose_files(self, 'add a plugin dialog', _('Add plugin'), - filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False, + filters=[(_('Plugins') + info, ['zip'])], all_files=False, select_only_single_file=True) if not path: return diff --git a/src/calibre/gui2/preferences/server.py b/src/calibre/gui2/preferences/server.py index f4a00c0932..16f2eb7316 100644 --- a/src/calibre/gui2/preferences/server.py +++ b/src/calibre/gui2/preferences/server.py @@ -63,16 +63,21 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): def start_server(self): ConfigWidgetBase.commit(self) - self.gui.start_content_server(check_started=False) - while not self.gui.content_server.is_running and self.gui.content_server.exception is None: - time.sleep(1) - if self.gui.content_server.exception is not None: - error_dialog(self, _('Failed to start content server'), - as_unicode(self.gui.content_server.exception)).exec_() - return - self.start_button.setEnabled(False) - self.test_button.setEnabled(True) - self.stop_button.setEnabled(True) + self.setCursor(Qt.BusyCursor) + try: + self.gui.start_content_server(check_started=False) + while (not self.gui.content_server.is_running and + self.gui.content_server.exception is None): + time.sleep(0.1) + if self.gui.content_server.exception is not None: + error_dialog(self, _('Failed to start content server'), + as_unicode(self.gui.content_server.exception)).exec_() + return + self.start_button.setEnabled(False) + self.test_button.setEnabled(True) + self.stop_button.setEnabled(True) + finally: + self.unsetCursor() def stop_server(self): self.gui.content_server.threaded_exit() diff --git a/src/calibre/gui2/preferences/server.ui b/src/calibre/gui2/preferences/server.ui index be68c2448f..4c8691500c 100644 --- a/src/calibre/gui2/preferences/server.ui +++ b/src/calibre/gui2/preferences/server.ui @@ -26,7 +26,7 @@ - + 65535 @@ -46,7 +46,7 @@ - + @@ -62,11 +62,16 @@ - If you leave the password blank, anyone will be able to access your book collection using the web interface. + <p>If you leave the password blank, anyone will be able to + access your book collection using the web interface. +<br> +<p>Note that passwords do not work with Android devices. +Leave this blank if you intend to use the server with an + Android phone or tablet. - + The maximum size (widthxheight) for displayed covers. Larger covers are resized. @@ -86,7 +91,7 @@ - + &Show password @@ -103,7 +108,7 @@ - + 10 @@ -113,7 +118,7 @@ - + 25 @@ -140,7 +145,7 @@ - + This restriction (based on a saved search) will restrict the books the content server makes available to those matching the search. This setting is per library (i.e. you can have a different restriction per library). @@ -153,6 +158,29 @@ + + + + + 50 + false + + + + <p>Because of a bug in Google's Android, setting a password + will prevent the server from working with Android devices. +<br> +<p>Do not set a password if you plan to use the server with an + Android phone or tablet. + + + QLabel {color:red} + + + Password incompatible with Android devices + + + diff --git a/src/calibre/gui2/preferences/toolbar.py b/src/calibre/gui2/preferences/toolbar.py index a173b53ed5..bae7a83e0b 100644 --- a/src/calibre/gui2/preferences/toolbar.py +++ b/src/calibre/gui2/preferences/toolbar.py @@ -17,10 +17,10 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget class FakeAction(object): - def __init__(self, name, icon, tooltip=None, + def __init__(self, name, gui_name, icon, tooltip=None, dont_add_to=frozenset([]), dont_remove_from=frozenset([])): self.name = name - self.action_spec = (name, icon, tooltip, None) + self.action_spec = (gui_name, icon, tooltip, None) self.dont_remove_from = dont_remove_from self.dont_add_to = dont_add_to @@ -28,17 +28,18 @@ class BaseModel(QAbstractListModel): def name_to_action(self, name, gui): if name == 'Donate': - return FakeAction(_('Donate'), 'donate.png', + return FakeAction('Donate', _('Donate'), 'donate.png', dont_add_to=frozenset(['context-menu', 'context-menu-device'])) if name == 'Location Manager': - return FakeAction(_('Location Manager'), None, + return FakeAction('Location Manager', _('Location Manager'), 'reader.png', _('Switch between library and device views'), dont_add_to=frozenset(['menubar', 'toolbar', 'toolbar-child', 'context-menu', 'context-menu-device'])) if name is None: - return FakeAction('--- '+_('Separator')+' ---', None, + return FakeAction('--- '+('Separator')+' ---', + '--- '+_('Separator')+' ---', None, dont_add_to=frozenset(['menubar', 'menubar-device'])) try: return gui.iactions[name] @@ -314,7 +315,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): if not pref_in_toolbar and not pref_in_menubar: self.models['menubar'][1].add(['Preferences']) if not lm_in_toolbar and not lm_in_menubar: - self.models['menubar-device'][1].add(['Location Manager']) + m = self.models['toolbar-device'][1] + m.add(['Location Manager']) + m.move(m.index(m.rowCount(None)-1), 5-m.rowCount(None)) # Save data. for am, cm in self.models.values(): diff --git a/src/calibre/gui2/shortcuts.py b/src/calibre/gui2/shortcuts.py index 5cdaf2da8a..a245db9f03 100644 --- a/src/calibre/gui2/shortcuts.py +++ b/src/calibre/gui2/shortcuts.py @@ -260,11 +260,11 @@ class ShortcutConfig(QWidget): self.view.setModel(model) self.delegate = Delegate() self.view.setItemDelegate(self.delegate) - self.delegate.sizeHintChanged.connect(self.scrollTo) + self.delegate.sizeHintChanged.connect(self.scrollTo, + type=Qt.QueuedConnection) def scrollTo(self, index): - self.view.scrollTo(index) - + self.view.scrollTo(index, self.view.EnsureVisible) @property def is_editing(self): diff --git a/src/calibre/gui2/tag_browser/view.py b/src/calibre/gui2/tag_browser/view.py index 4559eedc0d..98dbd7f51a 100644 --- a/src/calibre/gui2/tag_browser/view.py +++ b/src/calibre/gui2/tag_browser/view.py @@ -373,7 +373,7 @@ class TagsView(QTreeView): # {{{ add_node_tree(tree_dict[k], tm, p) p.pop() add_node_tree(nt, m, []) - elif key == 'search': + elif key == 'search' and tag.is_searchable: self.context_menu.addAction(self.rename_icon, _('Rename %s')%display_name(tag), partial(self.context_menu_handler, action='edit_item', @@ -381,7 +381,7 @@ class TagsView(QTreeView): # {{{ self.context_menu.addAction(self.delete_icon, _('Delete search %s')%display_name(tag), partial(self.context_menu_handler, - action='delete_search', key=tag.name)) + action='delete_search', key=tag.original_name)) if key.startswith('@') and not item.is_gst: self.context_menu.addAction(self.user_category_icon, _('Remove %(item)s from category %(cat)s')% @@ -389,17 +389,18 @@ class TagsView(QTreeView): # {{{ partial(self.context_menu_handler, action='delete_item_from_user_category', key = key, index = tag_item)) - # Add the search for value items. All leaf nodes are searchable - self.context_menu.addAction(self.search_icon, - _('Search for %s')%display_name(tag), - partial(self.context_menu_handler, action='search', - search_state=TAG_SEARCH_STATES['mark_plus'], - index=index)) - self.context_menu.addAction(self.search_icon, - _('Search for everything but %s')%display_name(tag), - partial(self.context_menu_handler, action='search', - search_state=TAG_SEARCH_STATES['mark_minus'], - index=index)) + if tag.is_searchable: + # Add the search for value items. All leaf nodes are searchable + self.context_menu.addAction(self.search_icon, + _('Search for %s')%display_name(tag), + partial(self.context_menu_handler, action='search', + search_state=TAG_SEARCH_STATES['mark_plus'], + index=index)) + self.context_menu.addAction(self.search_icon, + _('Search for everything but %s')%display_name(tag), + partial(self.context_menu_handler, action='search', + search_state=TAG_SEARCH_STATES['mark_minus'], + index=index)) self.context_menu.addSeparator() elif key.startswith('@') and not item.is_gst: if item.can_be_edited: diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 16fdb2e155..800eeacdc8 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -368,9 +368,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ self.library_view.model().db, server_config().parse()) self.content_server.state_callback = Dispatcher( self.iactions['Connect Share'].content_server_state_changed) - self.content_server.state_callback(True) if check_started: - QTimer.singleShot(10000, self.test_server) + self.content_server.start_failure_callback = \ + Dispatcher(self.content_server_start_failed) + + def content_server_start_failed(self, msg): + error_dialog(self, _('Failed to start Content Server'), + _('Could not start the content server. Error:\n\n%s')%msg, + show=True) def resizeEvent(self, ev): MainWindow.resizeEvent(self, ev) @@ -407,11 +412,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ return getattr(self, '__systray_minimized', False) def ask_a_yes_no_question(self, title, msg, det_msg='', - show_copy_button=False, ans_when_user_unavailable=True): + show_copy_button=False, ans_when_user_unavailable=True, + skip_dialog_name=None, skipped_value=True): if self.is_minimized_to_tray: return ans_when_user_unavailable return question_dialog(self, title, msg, det_msg=det_msg, - show_copy_button=show_copy_button) + show_copy_button=show_copy_button, + skip_dialog_name=skip_dialog_name, + skip_dialog_skipped_value=skipped_value) def hide_windows(self): for window in QApplication.topLevelWidgets(): diff --git a/src/calibre/gui2/viewer/config.ui b/src/calibre/gui2/viewer/config.ui index 6e37170154..fd43cd79ad 100644 --- a/src/calibre/gui2/viewer/config.ui +++ b/src/calibre/gui2/viewer/config.ui @@ -7,7 +7,7 @@ 0 0 479 - 591 + 630 @@ -277,6 +277,27 @@ + + + + Font &magnification step size: + + + opt_font_mag_step + + + + + + + The amount by which the font size is increased/decreased + when you click the font size larger/smaller buttons + + + % + + + diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 170fb936bd..9009182cb5 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' # Imports {{{ -import os, math, re, glob, sys +import os, math, re, glob, sys, zipfile from base64 import b64encode from functools import partial @@ -12,24 +12,21 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, QPainter, QPalette, QBrush, QFontDatabase, QDialog, QColor, QPoint, QImage, QRegion, QVariant, QIcon, QFont, pyqtSignature, QAction, QByteArray, QMenu, - pyqtSignal) + pyqtSignal, QSwipeGesture) from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from calibre.utils.config import Config, StringConfig from calibre.utils.localization import get_language from calibre.gui2.viewer.config_ui import Ui_Dialog from calibre.gui2.viewer.flip import SlideFlip -from calibre.gui2.viewer.gestures import Gestures from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig from calibre.constants import iswindows from calibre import prints, guess_type from calibre.gui2.viewer.keys import SHORTCUTS +from calibre.gui2.viewer.javascript import JavaScriptLoader # }}} -bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = \ - hyphenator = images = hyphen_pats = None - def load_builtin_fonts(): base = P('fonts/liberation/*.ttf') for f in glob.glob(base): @@ -62,6 +59,10 @@ def config(defaults=None): c.add_opt('page_flip_duration', default=0.5, help=_('The time, in seconds, for the page flip animation. Default' ' is half a second.')) + c.add_opt('font_magnification_step', default=0.2, + help=_('The amount by which to change the font size when clicking' + ' the font larger/smaller buttons. Should be a number between ' + '0 and 1.')) fonts = c.add_group('FONTS', _('Font options')) fonts('serif_family', default='Times New Roman' if iswindows else 'Liberation Serif', @@ -87,6 +88,10 @@ class ConfigDialog(QDialog, Ui_Dialog): self.opt_remember_current_page.setChecked(opts.remember_current_page) self.opt_wheel_flips_pages.setChecked(opts.wheel_flips_pages) self.opt_page_flip_duration.setValue(opts.page_flip_duration) + fms = opts.font_magnification_step + if fms < 0.01 or fms > 1: + fms = 0.2 + self.opt_font_mag_step.setValue(int(fms*100)) self.serif_family.setCurrentFont(QFont(opts.serif_family)) self.sans_family.setCurrentFont(QFont(opts.sans_family)) self.mono_family.setCurrentFont(QFont(opts.mono_family)) @@ -96,9 +101,9 @@ class ConfigDialog(QDialog, Ui_Dialog): self.css.setPlainText(opts.user_css) self.css.setToolTip(_('Set the user CSS stylesheet. This can be used to customize the look of all books.')) self.max_view_width.setValue(opts.max_view_width) - pats = [os.path.basename(x).split('.')[0].replace('-', '_') for x in - glob.glob(P('viewer/hyphenate/patterns/*.js', - allow_user_override=False))] + with zipfile.ZipFile(P('viewer/hyphenate/patterns.zip', + allow_user_override=False), 'r') as zf: + pats = [x.split('.')[0].replace('-', '_') for x in zf.namelist()] names = list(map(get_language, pats)) pmap = {} for i in range(len(pats)): @@ -143,6 +148,8 @@ class ConfigDialog(QDialog, Ui_Dialog): c.set('remember_current_page', self.opt_remember_current_page.isChecked()) c.set('wheel_flips_pages', self.opt_wheel_flips_pages.isChecked()) c.set('page_flip_duration', self.opt_page_flip_duration.value()) + c.set('font_magnification_step', + float(self.opt_font_mag_step.value())/100.) idx = self.hyphenate_default_lang.currentIndex() c.set('hyphenate_default_lang', str(self.hyphenate_default_lang.itemData(idx).toString())) @@ -172,13 +179,16 @@ class Document(QWebPage): # {{{ self.misc_config() self.after_load() - def __init__(self, shortcuts, parent=None, resize_callback=lambda: None): + def __init__(self, shortcuts, parent=None, resize_callback=lambda: None, + debug_javascript=False): QWebPage.__init__(self, parent) self.setObjectName("py_bridge") - self.debug_javascript = False + self.debug_javascript = debug_javascript self.resize_callback = resize_callback self.current_language = None self.loaded_javascript = False + self.js_loader = JavaScriptLoader( + dynamic_coffeescript=self.debug_javascript) self.setLinkDelegationPolicy(self.DelegateAllLinks) self.scroll_marks = [] @@ -223,6 +233,7 @@ class Document(QWebPage): # {{{ self.do_fit_images = opts.fit_images self.page_flip_duration = opts.page_flip_duration self.enable_page_flip = self.page_flip_duration > 0.1 + self.font_magnification_step = opts.font_magnification_step self.wheel_flips_pages = opts.wheel_flips_pages def fit_images(self): @@ -234,8 +245,6 @@ class Document(QWebPage): # {{{ self.loaded_javascript = False def load_javascript_libraries(self): - global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, \ - hyphenator, images, hyphen_pats if self.loaded_javascript: return self.loaded_javascript = True @@ -245,48 +254,8 @@ class Document(QWebPage): # {{{ window.py_bridge.window_resized(); } ''') - if jquery is None: - jquery = P('content_server/jquery.js', data=True) - self.javascript(jquery) - if jquery_scrollTo is None: - jquery_scrollTo = P('viewer/jquery_scrollTo.js', data=True) - self.javascript(jquery_scrollTo) - if bookmarks is None: - bookmarks = P('viewer/bookmarks.js', data=True) - self.javascript(bookmarks) - if referencing is None: - referencing = P('viewer/referencing.js', data=True) - self.javascript(referencing) - if images is None: - images = P('viewer/images.js', data=True) - self.javascript(images) - if hyphenation is None: - hyphenation = P('viewer/hyphenation.js', data=True) - self.javascript(hyphenation) - default_lang = self.hyphenate_default_lang - lang = self.current_language - if not lang: - lang = default_lang - def lang_name(l): - if l == 'en': - l = 'en-us' - return l.lower().replace('_', '-') - if hyphenator is None: - hyphenator = P('viewer/hyphenate/Hyphenator.js', data=True).decode('utf-8') - if hyphen_pats is None: - hyphen_pats = [] - for x in glob.glob(P('viewer/hyphenate/patterns/*.js', - allow_user_override=False)): - with open(x, 'rb') as f: - hyphen_pats.append(f.read().decode('utf-8')) - hyphen_pats = u'\n'.join(hyphen_pats) - - self.javascript(hyphenator+hyphen_pats) - p = P('viewer/hyphenate/patterns/%s.js'%lang_name(lang)) - if not os.path.exists(p): - lang = default_lang - p = P('viewer/hyphenate/patterns/%s.js'%lang_name(lang)) - self.loaded_lang = lang_name(lang) + self.loaded_lang = self.js_loader(self.mainFrame().evaluateJavaScript, + self.current_language, self.hyphenate_default_lang) @pyqtSignature("") def animated_scroll_done(self): @@ -500,12 +469,10 @@ class DocumentView(QWebView): # {{{ magnification_changed = pyqtSignal(object) DISABLED_BRUSH = QBrush(Qt.lightGray, Qt.Dense5Pattern) - def __init__(self, *args): - QWebView.__init__(self, *args) + def initialize_view(self, debug_javascript=False): self.flipper = SlideFlip(self) - self.gestures = Gestures() self.is_auto_repeat_event = False - self.debug_javascript = False + self.debug_javascript = debug_javascript self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer') self.self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>', re.IGNORECASE) @@ -514,7 +481,8 @@ class DocumentView(QWebView): # {{{ self.initial_pos = 0.0 self.to_bottom = False self.document = Document(self.shortcuts, parent=self, - resize_callback=self.viewport_resized) + resize_callback=self.viewport_resized, + debug_javascript=debug_javascript) self.setPage(self.document) self.manager = None self._reference_mode = False @@ -571,6 +539,7 @@ class DocumentView(QWebView): # {{{ else: m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0]) self.goto_location_action.setMenu(self.goto_location_menu) + self.grabGesture(Qt.SwipeGesture) def goto_next_section(self, *args): if self.manager is not None: @@ -932,13 +901,17 @@ class DocumentView(QWebView): # {{{ self.magnification_changed.emit(val) return property(fget=fget, fset=fset) - def magnify_fonts(self): - self.multiplier += 0.2 + def magnify_fonts(self, amount=None): + if amount is None: + amount = self.document.font_magnification_step + self.multiplier += amount return self.document.scroll_fraction - def shrink_fonts(self): - if self.multiplier >= 0.2: - self.multiplier -= 0.2 + def shrink_fonts(self, amount=None): + if amount is None: + amount = self.document.font_magnification_step + if self.multiplier >= amount: + self.multiplier -= amount return self.document.scroll_fraction def changeEvent(self, event): @@ -956,6 +929,11 @@ class DocumentView(QWebView): # {{{ painter.end() def wheelEvent(self, event): + mods = event.modifiers() + if mods & Qt.CTRL: + if self.manager is not None and event.delta() != 0: + (self.manager.font_size_larger if event.delta() > 0 else + self.manager.font_size_smaller)() if event.delta() < -14: if self.document.wheel_flips_pages: self.next_page() @@ -1027,28 +1005,24 @@ class DocumentView(QWebView): # {{{ self.manager.viewport_resized(self.scroll_fraction) def event(self, ev): - typ = ev.type() - if typ == ev.TouchBegin: - try: - self.gestures.start_gesture('touch', ev) - except: - import traceback - traceback.print_exc() - elif typ == ev.TouchEnd: - try: - gesture = self.gestures.end_gesture('touch', ev, self.rect()) - except: - import traceback - traceback.print_exc() - if gesture is not None: - ev.accept() - if gesture == 'lineleft': - self.next_page() - elif gesture == 'lineright': - self.previous_page() + if ev.type() == ev.Gesture: + swipe = ev.gesture(Qt.SwipeGesture) + if swipe is not None: + self.handle_swipe(swipe) return True return QWebView.event(self, ev) + def handle_swipe(self, swipe): + if swipe.state() == Qt.GestureFinished: + if swipe.horizontalDirection() == QSwipeGesture.Left: + self.previous_page() + elif swipe.horizontalDirection() == QSwipeGesture.Right: + self.next_page() + elif swipe.verticalDirection() == QSwipeGesture.Up: + self.goto_previous_section() + elif swipe.horizontalDirection() == QSwipeGesture.Down: + self.goto_next_section() + def mouseReleaseEvent(self, ev): opos = self.document.ypos ret = QWebView.mouseReleaseEvent(self, ev) diff --git a/src/calibre/gui2/viewer/gestures.py b/src/calibre/gui2/viewer/gestures.py deleted file mode 100644 index 86d2f842b9..0000000000 --- a/src/calibre/gui2/viewer/gestures.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import time - -class Gestures(object): - - def __init__(self): - self.in_progress = {} - - def get_boundary_point(self, event): - t = time.time() - id_ = None - if hasattr(event, 'touchPoints'): - tps = list(event.touchPoints()) - tp = None - for t in tps: - if t.isPrimary(): - tp = t - break - if tp is None: - tp = tps[0] - gp, p = tp.screenPos(), tp.pos() - id_ = tp.id() - else: - gp, p = event.globalPos(), event.pos() - return (t, gp, p, id_) - - def start_gesture(self, typ, event): - self.in_progress[typ] = self.get_boundary_point(event) - - def is_in_progress(self, typ): - return typ in self.in_progress - - def end_gesture(self, typ, event, widget_rect): - if not self.is_in_progress(typ): - return - start = self.in_progress[typ] - end = self.get_boundary_point(event) - if start[3] != end[3]: - return - timespan = end[0] - start[0] - start_pos, end_pos = start[1], end[1] - xspan = end_pos.x() - start_pos.x() - yspan = end_pos.y() - start_pos.y() - - width = widget_rect.width() - - if timespan < 1.1 and abs(xspan) >= width/5. and \ - abs(yspan) < abs(xspan)/5.: - # Quick horizontal gesture - return 'line'+('left' if xspan < 0 else 'right') - - return None - - - diff --git a/src/calibre/gui2/viewer/javascript.py b/src/calibre/gui2/viewer/javascript.py new file mode 100644 index 0000000000..e092d55f14 --- /dev/null +++ b/src/calibre/gui2/viewer/javascript.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os, zipfile + +import calibre +from calibre.utils.localization import lang_as_iso639_1 + +class JavaScriptLoader(object): + + JS = {x:('viewer/%s.js'%x if y is None else y) for x, y in { + + 'bookmarks':None, + 'referencing':None, + 'hyphenation':None, + 'jquery':'content_server/jquery.js', + 'jquery_scrollTo':None, + 'hyphenator':'viewer/hyphenate/Hyphenator.js', + 'images':None + + }.iteritems()} + + CS = { + 'cfi':('ebooks/oeb/display/cfi.coffee', 'display/cfi.js'), + } + + ORDER = ('jquery', 'jquery_scrollTo', 'bookmarks', 'referencing', 'images', + 'hyphenation', 'hyphenator', 'cfi',) + + + def __init__(self, dynamic_coffeescript=False): + self._dynamic_coffeescript = dynamic_coffeescript + self._cache = {} + self._hp_cache = {} + + def get(self, name): + ans = self._cache.get(name, None) + if ans is None: + src = self.CS.get(name, None) + if src is None: + src = self.JS.get(name, None) + if src is None: + raise KeyError('No such resource: %s'%name) + ans = P(src, data=True, + allow_user_override=False).decode('utf-8') + else: + f = getattr(calibre, '__file__', None) + if self._dynamic_coffeescript and f and os.path.exists(f): + src = src[0] + src = os.path.join(os.path.dirname(f), *(src.split('/'))) + from calibre.utils.serve_coffee import compile_coffeescript + with open(src, 'rb') as f: + cs, errors = compile_coffeescript(f.read(), src) + if errors: + for line in errors: + print (line) + raise Exception('Failed to compile coffeescript' + ': %s'%src) + ans = cs + else: + ans = P(src[1], data=True, allow_user_override=False) + self._cache[name] = ans + return ans + + def __call__(self, evaljs, lang, default_lang): + for x in self.ORDER: + src = self.get(x) + evaljs(src) + + if not lang: + lang = 'en' + + def lang_name(l): + l = l.lower() + l = lang_as_iso639_1(l) + if not l: + l = 'en' + l = {'en':'en-us', 'nb':'nb-no', 'el':'el-monoton'}.get(l, l) + return l.lower().replace('_', '-') + + if not self._hp_cache: + with zipfile.ZipFile(P('viewer/hyphenate/patterns.zip', + allow_user_override=False), 'r') as zf: + for pat in zf.namelist(): + raw = zf.read(pat).decode('utf-8') + self._hp_cache[pat.partition('.')[0]] = raw + + if lang_name(lang) not in self._hp_cache: + lang = lang_name(default_lang) + + lang = lang_name(lang) + + evaljs('\n\n'.join(self._hp_cache.itervalues())) + + return lang + diff --git a/src/calibre/gui2/viewer/keys.py b/src/calibre/gui2/viewer/keys.py index 5ca1802092..7d5884f624 100644 --- a/src/calibre/gui2/viewer/keys.py +++ b/src/calibre/gui2/viewer/keys.py @@ -11,7 +11,7 @@ SHORTCUTS = { 'Next Page' : (['PgDown', 'Space'], _('Scroll to the next page')), - 'Previous Page' : (['PgUp', 'Backspace'], + 'Previous Page' : (['PgUp', 'Backspace', 'Shift+Space'], _('Scroll to the previous page')), 'Next Section' : (['Ctrl+PgDown', 'Ctrl+Down'], diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index db6d0d8b91..2cf53824c4 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -175,6 +175,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): def __init__(self, pathtoebook=None, debug_javascript=False, open_at=None): MainWindow.__init__(self, None) self.setupUi(self) + self.view.initialize_view(debug_javascript) self.view.magnification_changed.connect(self.magnification_changed) self.show_toc_on_open = False self.current_book_has_toc = False @@ -215,7 +216,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.search.setMinimumWidth(200) self.tool_bar2.insertWidget(self.action_find_next, self.search) self.view.set_manager(self) - self.view.document.debug_javascript = debug_javascript self.pi = ProgressIndicator(self) self.toc.setVisible(False) self.action_quit = QAction(self) @@ -243,10 +243,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.connect(self.action_metadata, SIGNAL('triggered(bool)'), lambda x:self.metadata.setVisible(x)) self.action_table_of_contents.toggled[bool].connect(self.set_toc_visible) self.connect(self.action_copy, SIGNAL('triggered(bool)'), self.copy) - self.connect(self.action_font_size_larger, SIGNAL('triggered(bool)'), - self.font_size_larger) - self.connect(self.action_font_size_smaller, SIGNAL('triggered(bool)'), - self.font_size_smaller) + self.action_font_size_larger.triggered.connect(self.font_size_larger) + self.action_font_size_smaller.triggered.connect(self.font_size_smaller) self.connect(self.action_open_ebook, SIGNAL('triggered(bool)'), self.open_ebook) self.connect(self.action_next_page, SIGNAL('triggered(bool)'), @@ -482,13 +480,13 @@ class EbookViewer(MainWindow, Ui_EbookViewer): def open_recent(self, action): self.load_ebook(action.path) - def font_size_larger(self, checked): + def font_size_larger(self): frac = self.view.magnify_fonts() self.action_font_size_larger.setEnabled(self.view.multiplier < 3) self.action_font_size_smaller.setEnabled(self.view.multiplier > 0.2) self.set_page_number(frac) - def font_size_smaller(self, checked): + def font_size_smaller(self): frac = self.view.shrink_fonts() self.action_font_size_larger.setEnabled(self.view.multiplier < 3) self.action_font_size_smaller.setEnabled(self.view.multiplier > 0.2) diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index 09b655f758..f3badd91c9 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -660,7 +660,8 @@ class HistoryLineEdit(QComboBox): # {{{ def focusOutEvent(self, e): QComboBox.focusOutEvent(self, e) - self.lost_focus.emit() + if not (self.hasFocus() or self.view().hasFocus()): + self.lost_focus.emit() # }}} diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py index 841fc6181a..7ec7d6f31d 100644 --- a/src/calibre/gui2/wizard/__init__.py +++ b/src/calibre/gui2/wizard/__init__.py @@ -80,7 +80,7 @@ class Kindle(Device): output_profile = 'kindle' output_format = 'MOBI' - name = 'Kindle 1, 2, 3 or 4' + name = 'Kindle 1-4 and Touch' manufacturer = 'Amazon' id = 'kindle' @@ -122,7 +122,7 @@ class Sony505(Device): id = 'prs505' class Kobo(Device): - name = 'Kobo Reader' + name = 'Kobo and Kobo Touch Readers' manufacturer = 'Kobo' output_profile = 'kobo' output_format = 'EPUB' diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index e2552b3a82..f30e200296 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -666,8 +666,8 @@ class EPUB_MOBI(CatalogPlugin): # {{{ default=('~,'+_('Catalog')), dest='exclude_tags', action = None, - help=_("Comma-separated list of tag words indicating book should be excluded from output." - "For example: 'skip' will match 'skip this book' and 'Skip will like this'." + help=_("Comma-separated list of tag words indicating book should be excluded from output. " + "For example: 'skip' will match 'skip this book' and 'Skip will like this'. " "Default: '%default'\n" "Applies to: ePub, MOBI output formats")), Option('--generate-authors', diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index 9a2a27aecc..4c5ade37b0 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -276,6 +276,37 @@ class CustomColumns(object): self.conn.execute('DELETE FROM %s WHERE value=?'%lt, (id,)) self.conn.execute('DELETE FROM %s WHERE id=?'%table, (id,)) self.conn.commit() + + def is_item_used_in_multiple(self, item, label=None, num=None): + existing_tags = self.all_custom(label=label, num=num) + return item.lower() in {t.lower() for t in existing_tags} + + def delete_item_from_multiple(self, item, label=None, num=None): + if label is not None: + data = self.custom_column_label_map[label] + if num is not None: + data = self.custom_column_num_map[num] + if data['datatype'] != 'text' or not data['is_multiple']: + raise ValueError('Column %r is not text/multiple'%data['label']) + existing_tags = list(self.all_custom(label=label, num=num)) + lt = [t.lower() for t in existing_tags] + try: + idx = lt.index(item.lower()) + except ValueError: + idx = -1 + books_affected = [] + if idx > -1: + table, lt = self.custom_table_names(data['num']) + id_ = self.conn.get('SELECT id FROM %s WHERE value = ?'%table, + (existing_tags[idx],), all=False) + if id_: + books = self.conn.get('SELECT book FROM %s WHERE value = ?'%lt, (id_,)) + if books: + books_affected = [b[0] for b in books] + self.conn.execute('DELETE FROM %s WHERE value=?'%lt, (id_,)) + self.conn.execute('DELETE FROM %s WHERE id=?'%table, (id_,)) + self.conn.commit() + return books_affected # end convenience methods def get_next_cc_series_num_for(self, series, label=None, num=None): diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 02dda6622e..00ca0e39a2 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -40,6 +40,7 @@ from calibre.utils.magick.draw import save_cover_data_to from calibre.utils.recycle_bin import delete_file, delete_tree from calibre.utils.formatter_functions import load_user_template_functions from calibre.db.errors import NoSuchFormat +from calibre.db.lazy import FormatMetadata, FormatsList from calibre.utils.localization import (canonicalize_lang, calibre_langcode_to_name) @@ -81,7 +82,6 @@ class Tag(object): def __repr__(self): return str(self) - class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ''' An ebook metadata database that stores references to ebook files on disk. @@ -170,6 +170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except: traceback.print_exc() self.field_metadata = FieldMetadata() + self.format_filename_cache = defaultdict(dict) self._library_id_ = None # Create the lock to be used to guard access to the metadata writer # queues. This must be an RLock, not a Lock @@ -310,6 +311,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if not self.is_second_db: load_user_template_functions(self.prefs.get('user_template_functions', [])) + # Load the format filename cache + self.format_filename_cache = defaultdict(dict) + for book_id, fmt, name in self.conn.get( + 'SELECT book,format,name FROM data'): + self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name + self.conn.executescript(''' DROP TRIGGER IF EXISTS author_insert_trg; CREATE TEMP TRIGGER author_insert_trg @@ -546,7 +553,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): authors = self.authors(id, index_is_id=True) if not authors: authors = _('Unknown') - author = ascii_filename(authors.split(',')[0] + author = ascii_filename(authors.split(',')[0].replace('|', ',') )[:self.PATH_LIMIT].decode('ascii', 'replace') title = ascii_filename(self.title(id, index_is_id=True) )[:self.PATH_LIMIT].decode('ascii', 'replace') @@ -565,7 +572,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): authors = self.authors(id, index_is_id=True) if not authors: authors = _('Unknown') - author = ascii_filename(authors.split(',')[0] + author = ascii_filename(authors.split(',')[0].replace('|', ',') )[:self.PATH_LIMIT].decode('ascii', 'replace') title = ascii_filename(self.title(id, index_is_id=True) )[:self.PATH_LIMIT].decode('ascii', 'replace') @@ -599,7 +606,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): fname = self.construct_file_name(id) changed = False for format in formats: - name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) + name = self.format_filename_cache[id].get(format.upper(), None) if name and name != fname: changed = True break @@ -911,11 +918,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): aum = [] aus = {} aul = {} - for (author, author_sort, link) in aut_list: - aut = author.replace('|', ',') - aum.append(aut) - aus[aut] = author_sort.replace('|', ',') - aul[aut] = link + try: + for (author, author_sort, link) in aut_list: + aut = author.replace('|', ',') + aum.append(aut) + aus[aut] = author_sort.replace('|', ',') + aul[aut] = link + except ValueError: + # Author has either ::: or :#: in it + for x in row[fm['authors']].split(','): + aum.append(x.replace('|', ',')) + aul[aum[-1]] = '' + aus[aum[-1]] = aum[-1] mi.title = row[fm['title']] mi.authors = aum mi.author_sort = row[fm['author_sort']] @@ -937,14 +951,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): good_formats = None else: formats = sorted(formats.split(',')) - good_formats = [] - for f in formats: - try: - mi.format_metadata[f] = self.format_metadata(id, f) - except: - pass - else: - good_formats.append(f) + mi.format_metadata = FormatMetadata(self, id, formats) + good_formats = FormatsList(formats, mi.format_metadata) mi.formats = good_formats tags = row[fm['tags']] if tags: @@ -1138,12 +1146,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def format_files(self, index, index_is_id=False): id = index if index_is_id else self.id(index) - try: - formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,)) - formats = map(lambda x:(x[0], x[1]), formats) - return formats - except: - return [] + return [(v, k) for k, v in self.format_filename_cache[id].iteritems()] def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' @@ -1229,7 +1232,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ''' id = index if index_is_id else self.id(index) try: - name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) + name = self.format_filename_cache[id][format.upper()] except: return None if name: @@ -1326,11 +1329,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True, replace=True): id = index if index_is_id else self.id(index) - if format: - self.format_metadata_cache[id].pop(format.upper(), None) + if not format: format = '' + self.format_metadata_cache[id].pop(format.upper(), None) + name = self.format_filename_cache[id].get(format.upper(), None) if path is None: path = os.path.join(self.library_path, self.path(id, index_is_id=True)) - name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) if name and not replace: return False name = self.construct_file_name(id) @@ -1348,6 +1351,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', (id, format.upper(), size, name)) self.conn.commit() + self.format_filename_cache[id][format.upper()] = name self.refresh_ids([id]) if notify: self.notify('metadata', [id]) @@ -1395,9 +1399,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def remove_format(self, index, format, index_is_id=False, notify=True, commit=True, db_only=False): id = index if index_is_id else self.id(index) - if format: - self.format_metadata_cache[id].pop(format.upper(), None) - name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) + if not format: format = '' + self.format_metadata_cache[id].pop(format.upper(), None) + name = self.format_filename_cache[id].pop(format.upper(), None) if name: if not db_only: try: @@ -1918,7 +1922,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ############# End get_categories - def tags_older_than(self, tag, delta, must_have_tag=None): + def tags_older_than(self, tag, delta, must_have_tag=None, + must_have_authors=None): ''' Return the ids of all books having the tag ``tag`` that are older than than the specified time. tag comparison is case insensitive. @@ -1927,6 +1932,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): the tag are returned. :param must_have_tag: If not None the list of matches will be restricted to books that have this tag + :param must_have_authors: A list of authors. If not None the list of + matches will be restricted to books that have these authors (case + insensitive). ''' tag = tag.lower().strip() mht = must_have_tag.lower().strip() if must_have_tag else None @@ -1934,9 +1942,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): tindex = self.FIELD_MAP['timestamp'] gindex = self.FIELD_MAP['tags'] iindex = self.FIELD_MAP['id'] + aindex = self.FIELD_MAP['authors'] + mah = must_have_authors + if mah is not None: + mah = [x.replace(',', '|').lower() for x in mah] + mah = ','.join(mah) for r in self.data._data: if r is not None: if delta is None or (now - r[tindex]) > delta: + if mah: + authors = r[aindex] or '' + if authors.lower() != mah: + continue tags = r[gindex] if tags: tags = [x.strip() for x in tags.lower().split(',')] @@ -2063,6 +2080,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if path_changed: self.set_path(id, index_is_id=True) + if should_replace_field('title_sort'): + self.set_title_sort(id, mi.title_sort, notify=False, commit=False) if should_replace_field('author_sort'): doit(self.set_author_sort, id, mi.author_sort, notify=False, commit=False) @@ -3119,6 +3138,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): stream.seek(0) mi = get_metadata(stream, format, use_libprs_metadata=False, force_read_metadata=True) + # Force the author to calibre as the auto delete of old news checks for + # both the author==calibre and the tag News + mi.authors = ['calibre'] stream.seek(0) if mi.series_index is None: mi.series_index = self.get_next_series_num_for(mi.series) @@ -3170,6 +3192,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def create_book_entry(self, mi, cover=None, add_duplicates=True, force_id=None): + if mi.tags: + mi.tags = list(mi.tags) self._add_newbook_tag(mi) if not add_duplicates and self.has_book(mi): return None diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index d065ddc926..b5cf2e5ef3 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -26,7 +26,7 @@ from calibre.library.server.cache import Cache from calibre.library.server.browse import BrowseServer from calibre.library.server.ajax import AjaxServer from calibre.utils.search_query_parser import saved_searches -from calibre import prints +from calibre import prints, as_unicode class DispatchController(object): # {{{ @@ -112,6 +112,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, self.opts = opts self.embedded = embedded self.state_callback = None + self.start_failure_callback = None try: self.max_cover_width, self.max_cover_height = \ map(int, self.opts.max_cover.split('x')) @@ -136,6 +137,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, 'engine.autoreload_on' : getattr(opts, 'auto_reload', False), 'tools.log_headers.on' : opts.develop, + 'tools.encode.encoding' : 'UTF-8', 'checker.on' : opts.develop, 'request.show_tracebacks': show_tracebacks, 'server.socket_host' : listen_on, @@ -168,7 +170,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, self.config['/'] = { 'tools.digest_auth.on' : True, 'tools.digest_auth.realm' : ( - 'Password to access your calibre library. Username is ' + 'Your calibre library. Username: ' + opts.username.strip()), 'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()}, } @@ -224,41 +226,57 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, h.setFormatter(cherrypy._cplogging.logfmt) log.access_log.addHandler(h) + def start_cherrypy(self): + try: + cherrypy.engine.start() + except: + ip = get_external_ip() + if not ip or ip.startswith('127.'): + raise + cherrypy.log('Trying to bind to single interface: '+ip) + # Change the host we listen on + cherrypy.config.update({'server.socket_host' : ip}) + # This ensures that the change is actually applied + cherrypy.server.socket_host = ip + cherrypy.server.httpserver = cherrypy.server.instance = None + + cherrypy.engine.start() + def start(self): self.is_running = False + self.exception = None cherrypy.tree.mount(root=None, config=self.config) try: - try: - cherrypy.engine.start() - except: - ip = get_external_ip() - if not ip or ip.startswith('127.'): - raise - cherrypy.log('Trying to bind to single interface: '+ip) - # Change the host we listen on - cherrypy.config.update({'server.socket_host' : ip}) - # This ensures that the change is actually applied - cherrypy.server.socket_host = ip - cherrypy.server.httpserver = cherrypy.server.instance = None - - cherrypy.engine.start() - - self.is_running = True - #if hasattr(cherrypy.engine, 'signal_handler'): - # cherrypy.engine.signal_handler.subscribe() - - cherrypy.engine.block() + self.start_cherrypy() except Exception as e: self.exception = e import traceback traceback.print_exc() + if callable(self.start_failure_callback): + try: + self.start_failure_callback(as_unicode(e)) + except: + pass + return + + try: + self.is_running = True + self.notify_listener() + cherrypy.engine.block() + except Exception as e: + import traceback + traceback.print_exc() + self.exception = e finally: self.is_running = False - try: - if callable(self.state_callback): - self.state_callback(self.is_running) - except: - pass + self.notify_listener() + + def notify_listener(self): + try: + if callable(self.state_callback): + self.state_callback(self.is_running) + except: + pass def exit(self): try: @@ -266,11 +284,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, finally: cherrypy.server.httpserver = None self.is_running = False - try: - if callable(self.state_callback): - self.state_callback(self.is_running) - except: - pass + self.notify_listener() def threaded_exit(self): from threading import Thread diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index 5b7d732820..6de6fad726 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -669,10 +669,15 @@ class BrowseServer(object): args = {'id':id_, 'mi':mi, } ccache = self.categories_cache() if add_category_links else {} + ftitle = fauthors = '' for key in mi.all_field_keys(): val = mi.format_field(key)[1] if not val: val = '' + if key == 'title': + ftitle = xml(val, True) + elif key == 'authors': + fauthors = xml(val, True) if add_category_links: added_key = False fm = mi.metadata_for_field(key) @@ -710,8 +715,8 @@ class BrowseServer(object): args[key] = xml(val, True) else: args[key] = xml(val, True) - fname = quote(ascii_filename(args['title']) + ' - ' + - ascii_filename(args['authors'])) + fname = quote(ascii_filename(ftitle) + ' - ' + + ascii_filename(fauthors)) return args, fmt, fmts, fname @Endpoint(mimetype='application/json; charset=utf-8') diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index a6b5c389de..eef131e89f 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -117,6 +117,8 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, data = TD() for fmt in book['formats'].split(','): + if not fmt or fmt.lower().startswith('original_'): + continue a = quote(ascii_filename(book['authors'])) t = quote(ascii_filename(book['title'])) s = SPAN( diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index 7ced74c70d..c37c1eafdb 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -323,7 +323,7 @@ remove all non-breaking-space entities, or may include false positive matches re tags, i.e. horizontal rules, and tags are exceptions. Horizontal rules can optionally be specified with styles, if you choose to add your own style be sure to include the 'width' setting, otherwise the style information will be discarded. Image tags can used, but |app| does not provide the ability to add the image during conversion, this must be done after the fact using - the 'Tweak Epub' feature, or Sigil. + the 'Tweak Book' feature, or Sigil. Example image tag (place the image within an 'Images' folder inside the epub after conversion): diff --git a/src/calibre/manual/develop.rst b/src/calibre/manual/develop.rst index 3524f9b3c3..8257eac3b0 100755 --- a/src/calibre/manual/develop.rst +++ b/src/calibre/manual/develop.rst @@ -65,7 +65,8 @@ to the latest code, use the command:: bzr merge -You can also download the source code as a tarball (archive) from `here `_. +The calibre repository is huge so the branch operation above takes along time (about an hour). If you want to get the code faster, the sourcecode for the latest release is always available as an +`archive `_. Submitting your changes to be included ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 9516be8931..224a4b2d1a 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -269,7 +269,7 @@ How do I use |app| with my Android phone/tablet? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air. -**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. +**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. Some devices may have a setting to put them in "disk mode" or "USB Transfer mode" that is needed before they act as USB disks. Using a USB cable ^^^^^^^^^^^^^^^^^^^^ diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 34dad57d93..f048d99d1b 100755 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -362,8 +362,9 @@ The syntax for searching for dates is:: If the date is ambiguous, the current locale is used for date comparison. For example, in an mm/dd/yyyy locale 2/1/2009 is interpreted as 1 Feb 2009. In a dd/mm/yyyy locale it is interpreted as 2 Jan 2009. Some special date strings are available. The string ``today`` translates to today's date, whatever it is. The -strings ``yesterday`` and ``thismonth`` also work. In addition, the string ``daysago`` can be used to compare -to a date some number of days ago. For example:: +strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work. +In addition, the string ``daysago`` (also translated) can be used to compare to a date some number of days ago. +For example:: date:>10daysago date:<=45daysago @@ -399,7 +400,7 @@ You can search for the absence or presence of a field using the special "true" a Yes/no custom columns are searchable. Searching for ``false``, ``empty``, or ``blank`` will find all books with undefined values in the column. Searching for ``true`` will find all books that do not have undefined values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column. -Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. +Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. Note that the words ``yes``, ``no``, ``blank``, ``empty``, ``checked`` and ``unchecked`` are translated; you must use the current language's equivalent word. The words ``true`` and ``false`` and the special values ``_yes`` and ``_no`` are not translated. Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`. diff --git a/src/calibre/manual/images/bookmark.png b/src/calibre/manual/images/bookmark.png index c6671a2541..4ba83fb49c 100644 Binary files a/src/calibre/manual/images/bookmark.png and b/src/calibre/manual/images/bookmark.png differ diff --git a/src/calibre/manual/images/pref_button.png b/src/calibre/manual/images/pref_button.png index f43f2d7627..52d9bae6e0 100644 Binary files a/src/calibre/manual/images/pref_button.png and b/src/calibre/manual/images/pref_button.png differ diff --git a/src/calibre/manual/images/ref_mode_button.png b/src/calibre/manual/images/ref_mode_button.png index efed1af26b..3ec70e91ab 100644 Binary files a/src/calibre/manual/images/ref_mode_button.png and b/src/calibre/manual/images/ref_mode_button.png differ diff --git a/src/calibre/manual/templates/layout.html b/src/calibre/manual/templates/layout.html index 8f35a9a6c5..88e1605f92 100644 --- a/src/calibre/manual/templates/layout.html +++ b/src/calibre/manual/templates/layout.html @@ -16,17 +16,17 @@
{% if not embedded %}
- - + +
{% endif %} {% block body %} {% endblock %} @@ -58,13 +58,14 @@ -
- - - - -
-
+ +
+ + + + +
+
{% endblock %} diff --git a/src/calibre/manual/templates/search.html b/src/calibre/manual/templates/search.html index 58901c8fb4..014ee11a65 100644 --- a/src/calibre/manual/templates/search.html +++ b/src/calibre/manual/templates/search.html @@ -33,7 +33,7 @@ ', i) - if j == -1: - break - orig_body = orig_body[:i] + orig_body[j+9:] - - page_file = os.path.join(temp_dir, 'page.html') - open(page_file, 'wb').write(orig_body) - - err_file = os.path.join(temp_dir, 'nsgmls.err') - command = ('%s -c%s -f%s -s -E10 %s' % - (nsgmls_path, catalog_path, err_file, page_file)) - command = command.replace('\\', '/') - os.system(command) - errs = open(err_file, 'rb').read() - - new_errs = [] - for err in errs.splitlines(): - ignore = False - for err_ign in errors_to_ignore or []: - if err.find(err_ign) != -1: - ignore = True - break - if not ignore: - new_errs.append(err) - - if new_errs: - response.body = wrong_content('
'.join(new_errs), orig_body) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - diff --git a/src/cherrypy/lib/wsgiapp.py b/src/cherrypy/lib/wsgiapp.py deleted file mode 100644 index 8aeb5755d9..0000000000 --- a/src/cherrypy/lib/wsgiapp.py +++ /dev/null @@ -1,77 +0,0 @@ -"""A CherryPy tool for hosting a foreign WSGI application.""" - -import sys -import warnings - -import cherrypy - - -# is this sufficient for start_response? -def start_response(status, response_headers, exc_info=None): - cherrypy.response.status = status - headers_dict = dict(response_headers) - cherrypy.response.headers.update(headers_dict) - -def make_environ(): - """grabbed some of below from wsgiserver.py - - for hosting WSGI apps in non-WSGI environments (yikes!) - """ - - request = cherrypy.request - - # create and populate the wsgi environ - environ = dict() - environ["wsgi.version"] = (1,0) - environ["wsgi.url_scheme"] = request.scheme - environ["wsgi.input"] = request.rfile - environ["wsgi.errors"] = sys.stderr - environ["wsgi.multithread"] = True - environ["wsgi.multiprocess"] = False - environ["wsgi.run_once"] = False - environ["REQUEST_METHOD"] = request.method - environ["SCRIPT_NAME"] = request.script_name - environ["PATH_INFO"] = request.path_info - environ["QUERY_STRING"] = request.query_string - environ["SERVER_PROTOCOL"] = request.protocol - environ["SERVER_NAME"] = request.local.name - environ["SERVER_PORT"] = request.local.port - environ["REMOTE_HOST"] = request.remote.name - environ["REMOTE_ADDR"] = request.remote.ip - environ["REMOTE_PORT"] = request.remote.port - # then all the http headers - headers = request.headers - environ["CONTENT_TYPE"] = headers.get("Content-type", "") - environ["CONTENT_LENGTH"] = headers.get("Content-length", "") - for (k, v) in headers.iteritems(): - envname = "HTTP_" + k.upper().replace("-","_") - environ[envname] = v - return environ - - -def run(app, env=None): - """Run the given WSGI app and set response.body to its output.""" - warnings.warn("This module is deprecated and will be removed in " - "Cherrypy 3.2. See http://www.cherrypy.org/ticket/700 " - "for more information.") - - try: - environ = cherrypy.request.wsgi_environ.copy() - environ['SCRIPT_NAME'] = cherrypy.request.script_name - environ['PATH_INFO'] = cherrypy.request.path_info - except AttributeError: - environ = make_environ() - - if env: - environ.update(env) - - # run the wsgi app and have it set response.body - response = app(environ, start_response) - try: - cherrypy.response.body = [x for x in response] - finally: - if hasattr(response, "close"): - response.close() - - return True - diff --git a/src/cherrypy/lib/xmlrpc.py b/src/cherrypy/lib/xmlrpcutil.py similarity index 78% rename from src/cherrypy/lib/xmlrpc.py rename to src/cherrypy/lib/xmlrpcutil.py index 59ee0278fe..9a44464bc0 100644 --- a/src/cherrypy/lib/xmlrpc.py +++ b/src/cherrypy/lib/xmlrpcutil.py @@ -1,13 +1,19 @@ import sys import cherrypy +from cherrypy._cpcompat import ntob +def get_xmlrpclib(): + try: + import xmlrpc.client as x + except ImportError: + import xmlrpclib as x + return x def process_body(): """Return (params, method) from request body.""" try: - import xmlrpclib - return xmlrpclib.loads(cherrypy.request.body.read()) + return get_xmlrpclib().loads(cherrypy.request.body.read()) except Exception: return ('ERROR PARAMS', ), 'ERRORMETHOD' @@ -29,13 +35,13 @@ def _set_response(body): # as a "Protocol Error", we'll just return 200 every time. response = cherrypy.response response.status = '200 OK' - response.body = body + response.body = ntob(body, 'utf-8') response.headers['Content-Type'] = 'text/xml' response.headers['Content-Length'] = len(body) def respond(body, encoding='utf-8', allow_none=0): - import xmlrpclib + xmlrpclib = get_xmlrpclib() if not isinstance(body, xmlrpclib.Fault): body = (body,) _set_response(xmlrpclib.dumps(body, methodresponse=1, @@ -44,6 +50,6 @@ def respond(body, encoding='utf-8', allow_none=0): def on_error(*args, **kwargs): body = str(sys.exc_info()[1]) - import xmlrpclib + xmlrpclib = get_xmlrpclib() _set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body))) diff --git a/src/cherrypy/process/plugins.py b/src/cherrypy/process/plugins.py index 0e8b4bf919..ba618a0bd0 100644 --- a/src/cherrypy/process/plugins.py +++ b/src/cherrypy/process/plugins.py @@ -2,19 +2,38 @@ import os import re -try: - set -except NameError: - from sets import Set as set import signal as _signal import sys import time import threading +from cherrypy._cpcompat import basestring, get_daemon, get_thread_ident, ntob, set + +# _module__file__base is used by Autoreload to make +# absolute any filenames retrieved from sys.modules which are not +# already absolute paths. This is to work around Python's quirk +# of importing the startup script and using a relative filename +# for it in sys.modules. +# +# Autoreload examines sys.modules afresh every time it runs. If an application +# changes the current directory by executing os.chdir(), then the next time +# Autoreload runs, it will not be able to find any filenames which are +# not absolute paths, because the current directory is not the same as when the +# module was first imported. Autoreload will then wrongly conclude the file has +# "changed", and initiate the shutdown/re-exec sequence. +# See ticket #917. +# For this workaround to have a decent probability of success, this module +# needs to be imported as early as possible, before the app has much chance +# to change the working directory. +_module__file__base = os.getcwd() + class SimplePlugin(object): """Plugin base class which auto-subscribes methods for known channels.""" + bus = None + """A :class:`Bus `, usually cherrypy.engine.""" + def __init__(self, bus): self.bus = bus @@ -39,16 +58,33 @@ class SimplePlugin(object): class SignalHandler(object): """Register bus channels (and listeners) for system signals. - By default, instantiating this object subscribes the following signals - and listeners: + You can modify what signals your application listens for, and what it does + when it receives signals, by modifying :attr:`SignalHandler.handlers`, + a dict of {signal name: callback} pairs. The default set is:: - TERM: bus.exit - HUP : bus.restart - USR1: bus.graceful + handlers = {'SIGTERM': self.bus.exit, + 'SIGHUP': self.handle_SIGHUP, + 'SIGUSR1': self.bus.graceful, + } + + The :func:`SignalHandler.handle_SIGHUP`` method calls + :func:`bus.restart()` + if the process is daemonized, but + :func:`bus.exit()` + if the process is attached to a TTY. This is because Unix window + managers tend to send SIGHUP to terminal windows when the user closes them. + + Feel free to add signals which are not available on every platform. The + :class:`SignalHandler` will ignore errors raised from attempting to register + handlers for unknown signals. """ - # Map from signal numbers to names + handlers = {} + """A map from signal names (e.g. 'SIGTERM') to handlers (e.g. bus.exit).""" + signals = {} + """A map from signal numbers to names.""" + for k, v in vars(_signal).items(): if k.startswith('SIG') and not k.startswith('SIG_'): signals[v] = k @@ -61,18 +97,32 @@ class SignalHandler(object): 'SIGHUP': self.handle_SIGHUP, 'SIGUSR1': self.bus.graceful, } - + + if sys.platform[:4] == 'java': + del self.handlers['SIGUSR1'] + self.handlers['SIGUSR2'] = self.bus.graceful + self.bus.log("SIGUSR1 cannot be set on the JVM platform. " + "Using SIGUSR2 instead.") + self.handlers['SIGINT'] = self._jython_SIGINT_handler + self._previous_handlers = {} + def _jython_SIGINT_handler(self, signum=None, frame=None): + # See http://bugs.jython.org/issue1313 + self.bus.log('Keyboard Interrupt: shutting down bus') + self.bus.exit() + def subscribe(self): - for sig, func in self.handlers.iteritems(): + """Subscribe self.handlers to signals.""" + for sig, func in self.handlers.items(): try: self.set_handler(sig, func) except ValueError: pass def unsubscribe(self): - for signum, handler in self._previous_handlers.iteritems(): + """Unsubscribe self.handlers from signals.""" + for signum, handler in self._previous_handlers.items(): signame = self.signals[signum] if handler is None: @@ -126,6 +176,7 @@ class SignalHandler(object): self.bus.publish(signame) def handle_SIGHUP(self): + """Restart if daemonized, else exit.""" if os.isatty(sys.stdin.fileno()): # not daemonized (may be foreground or background) self.bus.log("SIGHUP caught but not daemonized. Exiting.") @@ -165,7 +216,8 @@ class DropPrivileges(SimplePlugin): elif isinstance(val, basestring): val = pwd.getpwnam(val)[2] self._uid = val - uid = property(_get_uid, _set_uid, doc="The uid under which to run.") + uid = property(_get_uid, _set_uid, + doc="The uid under which to run. Availability: Unix.") def _get_gid(self): return self._gid @@ -178,7 +230,8 @@ class DropPrivileges(SimplePlugin): elif isinstance(val, basestring): val = grp.getgrnam(val)[2] self._gid = val - gid = property(_get_gid, _set_gid, doc="The gid under which to run.") + gid = property(_get_gid, _set_gid, + doc="The gid under which to run. Availability: Unix.") def _get_umask(self): return self._umask @@ -191,7 +244,12 @@ class DropPrivileges(SimplePlugin): level=30) val = None self._umask = val - umask = property(_get_umask, _set_umask, doc="The umask under which to run.") + umask = property(_get_umask, _set_umask, + doc="""The default permission mode for newly created files and directories. + + Usually expressed in octal format, for example, ``0644``. + Availability: Unix, Windows. + """) def start(self): # uid/gid @@ -216,6 +274,7 @@ class DropPrivileges(SimplePlugin): self.bus.log('Started as uid: %r gid: %r' % current_ids()) if self.gid is not None: os.setgid(self.gid) + os.setgroups([]) if self.uid is not None: os.setuid(self.uid) self.bus.log('Running as uid: %r gid: %r' % current_ids()) @@ -242,8 +301,8 @@ class DropPrivileges(SimplePlugin): class Daemonizer(SimplePlugin): """Daemonize the running script. - Use this with a Web Site Process Bus via: - + Use this with a Web Site Process Bus via:: + Daemonizer(bus).subscribe() When this component finishes, the process is completely decoupled from @@ -296,8 +355,9 @@ class Daemonizer(SimplePlugin): # This is the first parent. Exit, now that we've forked. self.bus.log('Forking once.') os._exit(0) - except OSError, exc: + except OSError: # Python raises OSError rather than returning negative numbers. + exc = sys.exc_info()[1] sys.exit("%s: fork #1 failed: (%d) %s\n" % (sys.argv[0], exc.errno, exc.strerror)) @@ -309,7 +369,8 @@ class Daemonizer(SimplePlugin): if pid > 0: self.bus.log('Forking twice.') os._exit(0) # Exit second parent - except OSError, exc: + except OSError: + exc = sys.exc_info()[1] sys.exit("%s: fork #2 failed: (%d) %s\n" % (sys.argv[0], exc.errno, exc.strerror)) @@ -318,7 +379,7 @@ class Daemonizer(SimplePlugin): si = open(self.stdin, "r") so = open(self.stdout, "a+") - se = open(self.stderr, "a+", 0) + se = open(self.stderr, "a+") # os.dup2(fd, fd2) will close fd2 if necessary, # so we don't explicitly close stdin/out/err. @@ -345,7 +406,7 @@ class PIDFile(SimplePlugin): if self.finalized: self.bus.log('PID %r already written to %r.' % (pid, self.pidfile)) else: - open(self.pidfile, "wb").write(str(pid)) + open(self.pidfile, "wb").write(ntob("%s" % pid, 'utf8')) self.bus.log('PID %r written to %r.' % (pid, self.pidfile)) self.finalized = True start.priority = 70 @@ -361,38 +422,94 @@ class PIDFile(SimplePlugin): class PerpetualTimer(threading._Timer): - """A subclass of threading._Timer whose run() method repeats.""" + """A responsive subclass of threading._Timer whose run() method repeats. + + Use this timer only when you really need a very interruptible timer; + this checks its 'finished' condition up to 20 times a second, which can + results in pretty high CPU usage + """ def run(self): while True: self.finished.wait(self.interval) if self.finished.isSet(): return - self.function(*self.args, **self.kwargs) + try: + self.function(*self.args, **self.kwargs) + except Exception: + self.bus.log("Error in perpetual timer thread function %r." % + self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + +class BackgroundTask(threading.Thread): + """A subclass of threading.Thread whose run() method repeats. + + Use this class for most repeating tasks. It uses time.sleep() to wait + for each interval, which isn't very responsive; that is, even if you call + self.cancel(), you'll have to wait until the sleep() call finishes before + the thread stops. To compensate, it defaults to being daemonic, which means + it won't delay stopping the whole process. + """ + + def __init__(self, interval, function, args=[], kwargs={}, bus=None): + threading.Thread.__init__(self) + self.interval = interval + self.function = function + self.args = args + self.kwargs = kwargs + self.running = False + self.bus = bus + + def cancel(self): + self.running = False + + def run(self): + self.running = True + while self.running: + time.sleep(self.interval) + if not self.running: + return + try: + self.function(*self.args, **self.kwargs) + except Exception: + if self.bus: + self.bus.log("Error in background task thread function %r." + % self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + def _set_daemon(self): + return True class Monitor(SimplePlugin): - """WSPBus listener to periodically run a callback in its own thread. + """WSPBus listener to periodically run a callback in its own thread.""" - bus: a Web Site Process Bus object. - callback: the function to call at intervals. - frequency: the time in seconds between callback runs. - """ + callback = None + """The function to call at intervals.""" frequency = 60 + """The time in seconds between callback runs.""" - def __init__(self, bus, callback, frequency=60): + thread = None + """A :class:`BackgroundTask` thread.""" + + def __init__(self, bus, callback, frequency=60, name=None): SimplePlugin.__init__(self, bus) self.callback = callback self.frequency = frequency self.thread = None + self.name = name def start(self): - """Start our callback in its own perpetual timer thread.""" + """Start our callback in its own background thread.""" if self.frequency > 0: - threadname = self.__class__.__name__ + threadname = self.name or self.__class__.__name__ if self.thread is None: - self.thread = PerpetualTimer(self.frequency, self.callback) + self.thread = BackgroundTask(self.frequency, self.callback, + bus = self.bus) self.thread.setName(threadname) self.thread.start() self.bus.log("Started monitor thread %r." % threadname) @@ -401,28 +518,54 @@ class Monitor(SimplePlugin): start.priority = 70 def stop(self): - """Stop our callback's perpetual timer thread.""" + """Stop our callback's background task thread.""" if self.thread is None: - self.bus.log("No thread running for %s." % self.__class__.__name__) + self.bus.log("No thread running for %s." % self.name or self.__class__.__name__) else: if self.thread is not threading.currentThread(): name = self.thread.getName() self.thread.cancel() - self.thread.join() + if not get_daemon(self.thread): + self.bus.log("Joining %r" % name) + self.thread.join() self.bus.log("Stopped thread %r." % name) self.thread = None def graceful(self): - """Stop the callback's perpetual timer thread and restart it.""" + """Stop the callback's background task thread and restart it.""" self.stop() self.start() class Autoreloader(Monitor): - """Monitor which re-executes the process when files change.""" + """Monitor which re-executes the process when files change. + + This :ref:`plugin` restarts the process (via :func:`os.execv`) + if any of the files it monitors change (or is deleted). By default, the + autoreloader monitors all imported modules; you can add to the + set by adding to ``autoreload.files``:: + + cherrypy.engine.autoreload.files.add(myFile) + + If there are imported files you do *not* wish to monitor, you can adjust the + ``match`` attribute, a regular expression. For example, to stop monitoring + cherrypy itself:: + + cherrypy.engine.autoreload.match = r'^(?!cherrypy).+' + + Like all :class:`Monitor` plugins, + the autoreload plugin takes a ``frequency`` argument. The default is + 1 second; that is, the autoreloader will examine files once each second. + """ + + files = None + """The set of files to poll for modifications.""" frequency = 1 + """The interval in seconds at which to poll for modified files.""" + match = '.*' + """A regular expression by which to match filenames.""" def __init__(self, bus, frequency=1, match='.*'): self.mtimes = {} @@ -431,24 +574,30 @@ class Autoreloader(Monitor): Monitor.__init__(self, bus, self.run, frequency) def start(self): - """Start our own perpetual timer thread for self.run.""" + """Start our own background task thread for self.run.""" if self.thread is None: self.mtimes = {} Monitor.start(self) start.priority = 70 - def run(self): - """Reload the process if registered files have been modified.""" - sysfiles = set() + def sysfiles(self): + """Return a Set of sys.modules filenames to monitor.""" + files = set() for k, m in sys.modules.items(): if re.match(self.match, k): - if hasattr(m, '__loader__'): - if hasattr(m.__loader__, 'archive'): - k = m.__loader__.archive - k = getattr(m, '__file__', None) - sysfiles.add(k) - - for filename in sysfiles | self.files: + if hasattr(m, '__loader__') and hasattr(m.__loader__, 'archive'): + f = m.__loader__.archive + else: + f = getattr(m, '__file__', None) + if f is not None and not os.path.isabs(f): + # ensure absolute paths so a os.chdir() in the app doesn't break me + f = os.path.normpath(os.path.join(_module__file__base, f)) + files.add(f) + return files + + def run(self): + """Reload the process if registered files have been modified.""" + for filename in self.sysfiles() | self.files: if filename: if filename.endswith('.pyc'): filename = filename[:-1] @@ -493,21 +642,26 @@ class ThreadManager(SimplePlugin): 'stop_thread' listeners for you when it stops. """ + threads = None + """A map of {thread ident: index number} pairs.""" + def __init__(self, bus): self.threads = {} SimplePlugin.__init__(self, bus) self.bus.listeners.setdefault('acquire_thread', set()) + self.bus.listeners.setdefault('start_thread', set()) self.bus.listeners.setdefault('release_thread', set()) - + self.bus.listeners.setdefault('stop_thread', set()) + def acquire_thread(self): """Run 'start_thread' listeners for the current thread. If the current thread has already been seen, any 'start_thread' listeners will not be run again. """ - thread_ident = threading._get_ident() + thread_ident = get_thread_ident() if thread_ident not in self.threads: - # We can't just use _get_ident as the thread ID + # We can't just use get_ident as the thread ID # because some platforms reuse thread ID's. i = len(self.threads) + 1 self.threads[thread_ident] = i @@ -515,14 +669,14 @@ class ThreadManager(SimplePlugin): def release_thread(self): """Release the current thread and run 'stop_thread' listeners.""" - thread_ident = threading._get_ident() + thread_ident = get_thread_ident() i = self.threads.pop(thread_ident, None) if i is not None: self.bus.publish('stop_thread', i) def stop(self): """Release all threads and run all 'stop_thread' listeners.""" - for thread_ident, i in self.threads.iteritems(): + for thread_ident, i in self.threads.items(): self.bus.publish('stop_thread', i) self.threads.clear() graceful = stop diff --git a/src/cherrypy/process/servers.py b/src/cherrypy/process/servers.py index da469bfad2..456da1e5f5 100644 --- a/src/cherrypy/process/servers.py +++ b/src/cherrypy/process/servers.py @@ -1,5 +1,117 @@ -"""Adapt an HTTP server.""" +""" +Starting in CherryPy 3.1, cherrypy.server is implemented as an +:ref:`Engine Plugin`. It's an instance of +:class:`cherrypy._cpserver.Server`, which is a subclass of +:class:`cherrypy.process.servers.ServerAdapter`. The ``ServerAdapter`` class +is designed to control other servers, as well. +Multiple servers/ports +====================== + +If you need to start more than one HTTP server (to serve on multiple ports, or +protocols, etc.), you can manually register each one and then start them all +with engine.start:: + + s1 = ServerAdapter(cherrypy.engine, MyWSGIServer(host='0.0.0.0', port=80)) + s2 = ServerAdapter(cherrypy.engine, another.HTTPServer(host='127.0.0.1', SSL=True)) + s1.subscribe() + s2.subscribe() + cherrypy.engine.start() + +.. index:: SCGI + +FastCGI/SCGI +============ + +There are also Flup\ **F**\ CGIServer and Flup\ **S**\ CGIServer classes in +:mod:`cherrypy.process.servers`. To start an fcgi server, for example, +wrap an instance of it in a ServerAdapter:: + + addr = ('0.0.0.0', 4000) + f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=addr) + s = servers.ServerAdapter(cherrypy.engine, httpserver=f, bind_addr=addr) + s.subscribe() + +The :doc:`cherryd` startup script will do the above for +you via its `-f` flag. +Note that you need to download and install `flup `_ +yourself, whether you use ``cherryd`` or not. + +.. _fastcgi: +.. index:: FastCGI + +FastCGI +------- + +A very simple setup lets your cherry run with FastCGI. +You just need the flup library, +plus a running Apache server (with ``mod_fastcgi``) or lighttpd server. + +CherryPy code +^^^^^^^^^^^^^ + +hello.py:: + + #!/usr/bin/python + import cherrypy + + class HelloWorld: + \"""Sample request handler class.\""" + def index(self): + return "Hello world!" + index.exposed = True + + cherrypy.tree.mount(HelloWorld()) + # CherryPy autoreload must be disabled for the flup server to work + cherrypy.config.update({'engine.autoreload_on':False}) + +Then run :doc:`/deployguide/cherryd` with the '-f' arg:: + + cherryd -c -d -f -i hello.py + +Apache +^^^^^^ + +At the top level in httpd.conf:: + + FastCgiIpcDir /tmp + FastCgiServer /path/to/cherry.fcgi -idle-timeout 120 -processes 4 + +And inside the relevant VirtualHost section:: + + # FastCGI config + AddHandler fastcgi-script .fcgi + ScriptAliasMatch (.*$) /path/to/cherry.fcgi$1 + +Lighttpd +^^^^^^^^ + +For `Lighttpd `_ you can follow these +instructions. Within ``lighttpd.conf`` make sure ``mod_fastcgi`` is +active within ``server.modules``. Then, within your ``$HTTP["host"]`` +directive, configure your fastcgi script like the following:: + + $HTTP["url"] =~ "" { + fastcgi.server = ( + "/" => ( + "script.fcgi" => ( + "bin-path" => "/path/to/your/script.fcgi", + "socket" => "/tmp/script.sock", + "check-local" => "disable", + "disable-time" => 1, + "min-procs" => 1, + "max-procs" => 1, # adjust as needed + ), + ), + ) + } # end of $HTTP["url"] =~ "^/" + +Please see `Lighttpd FastCGI Docs +`_ for an explanation +of the possible configuration options. +""" + +import sys import time @@ -34,7 +146,9 @@ class ServerAdapter(object): def start(self): """Start the HTTP server.""" - if isinstance(self.bind_addr, tuple): + if self.bind_addr is None: + on_what = "unknown interface (dynamic?)" + elif isinstance(self.bind_addr, tuple): host, port = self.bind_addr on_what = "%s:%s" % (host, port) else: @@ -71,17 +185,16 @@ class ServerAdapter(object): """ try: self.httpserver.start() - except KeyboardInterrupt, exc: + except KeyboardInterrupt: self.bus.log(" hit: shutting down HTTP server") - self.interrupt = exc + self.interrupt = sys.exc_info()[1] self.bus.exit() - except SystemExit, exc: + except SystemExit: self.bus.log("SystemExit raised: shutting down HTTP server") - self.interrupt = exc + self.interrupt = sys.exc_info()[1] self.bus.exit() raise except: - import sys self.interrupt = sys.exc_info()[1] self.bus.log("Error in HTTP server: shutting down", traceback=True, level=40) @@ -120,10 +233,40 @@ class ServerAdapter(object): self.start() +class FlupCGIServer(object): + """Adapter for a flup.server.cgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + self.ready = False + + def start(self): + """Start the CGI server.""" + # We have to instantiate the server class here because its __init__ + # starts a threadpool. If we do it too early, daemonize won't work. + from flup.server.cgi import WSGIServer + + self.cgiserver = WSGIServer(*self.args, **self.kwargs) + self.ready = True + self.cgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + self.ready = False + + class FlupFCGIServer(object): """Adapter for a flup.server.fcgi.WSGIServer.""" def __init__(self, *args, **kwargs): + if kwargs.get('bindAddress', None) is None: + import socket + if not hasattr(socket, 'fromfd'): + raise ValueError( + 'Dynamic FCGI server not available on this platform. ' + 'You must use a static or external one by providing a ' + 'legal bindAddress.') self.args = args self.kwargs = kwargs self.ready = False @@ -199,8 +342,9 @@ def client_host(server_host): if server_host == '0.0.0.0': # 0.0.0.0 is INADDR_ANY, which should answer on localhost. return '127.0.0.1' - if server_host == '::': + if server_host in ('::', '::0', '::0.0.0.0'): # :: is IN6ADDR_ANY, which should answer on localhost. + # ::0 and ::0.0.0.0 are non-canonical but common ways to write IN6ADDR_ANY. return '::1' return server_host @@ -215,8 +359,16 @@ def check_port(host, port, timeout=1.0): # AF_INET or AF_INET6 socket # Get the correct address family for our host (allows IPv6 addresses) - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM) + except socket.gaierror: + if ':' in host: + info = [(socket.AF_INET6, socket.SOCK_STREAM, 0, "", (host, port, 0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", (host, port))] + + for res in info: af, socktype, proto, canonname, sa = res s = None try: @@ -233,34 +385,43 @@ def check_port(host, port, timeout=1.0): if s: s.close() -def wait_for_free_port(host, port): + +# Feel free to increase these defaults on slow systems: +free_port_timeout = 0.2 # Changed by Kovid +occupied_port_timeout = 1.0 + +def wait_for_free_port(host, port, timeout=None): """Wait for the specified port to become free (drop requests).""" if not host: raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = free_port_timeout - for trial in xrange(50): + for trial in range(50): try: # we are expecting a free port, so reduce the timeout - check_port(host, port, timeout=0.2) # Changed by Kovid + check_port(host, port, timeout=timeout) except IOError: # Give the old server thread time to free the port. - time.sleep(0.2) # Changed by Kovid + time.sleep(timeout) else: return raise IOError("Port %r not free on %r" % (port, host)) -def wait_for_occupied_port(host, port): +def wait_for_occupied_port(host, port, timeout=None): """Wait for the specified port to become active (receive requests).""" if not host: raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = occupied_port_timeout - for trial in xrange(50): + for trial in range(50): try: - check_port(host, port) + check_port(host, port, timeout=timeout) except IOError: return else: - time.sleep(.1) + time.sleep(timeout) raise IOError("Port %r not bound on %r" % (port, host)) diff --git a/src/cherrypy/process/win32.py b/src/cherrypy/process/win32.py index 0ca43d5e9b..83f99a5d46 100644 --- a/src/cherrypy/process/win32.py +++ b/src/cherrypy/process/win32.py @@ -1,7 +1,6 @@ """Windows service. Requires pywin32.""" import os -import thread import win32api import win32con import win32event @@ -84,7 +83,7 @@ class Win32Bus(wspbus.Bus): return self.events[state] except KeyError: event = win32event.CreateEvent(None, 0, 0, - u"WSPBus %s Event (pid=%r)" % + "WSPBus %s Event (pid=%r)" % (state.name, os.getpid())) self.events[state] = event return event @@ -97,7 +96,7 @@ class Win32Bus(wspbus.Bus): win32event.PulseEvent(event) state = property(_get_state, _set_state) - def wait(self, state, interval=0.1): + def wait(self, state, interval=0.1, channel=None): """Wait for the given state(s), KeyboardInterrupt or SystemExit. Since this class uses native win32event objects, the interval @@ -128,7 +127,7 @@ class _ControlCodes(dict): def key_for(self, obj): """For the given value, return its corresponding key.""" - for key, val in self.iteritems(): + for key, val in self.items(): if val is obj: return key raise ValueError("The given object could not be found: %r" % obj) diff --git a/src/cherrypy/process/wspbus.py b/src/cherrypy/process/wspbus.py index 5bbcb8c629..0eacf03d20 100644 --- a/src/cherrypy/process/wspbus.py +++ b/src/cherrypy/process/wspbus.py @@ -20,24 +20,24 @@ autoreload component. Ideally, a Bus object will be flexible enough to be useful in a variety of invocation scenarios: - 1. The deployer starts a site from the command line via a framework- - neutral deployment script; applications from multiple frameworks - are mixed in a single site. Command-line arguments and configuration - files are used to define site-wide components such as the HTTP server, - WSGI component graph, autoreload behavior, signal handling, etc. + 1. The deployer starts a site from the command line via a + framework-neutral deployment script; applications from multiple frameworks + are mixed in a single site. Command-line arguments and configuration + files are used to define site-wide components such as the HTTP server, + WSGI component graph, autoreload behavior, signal handling, etc. 2. The deployer starts a site via some other process, such as Apache; - applications from multiple frameworks are mixed in a single site. - Autoreload and signal handling (from Python at least) are disabled. + applications from multiple frameworks are mixed in a single site. + Autoreload and signal handling (from Python at least) are disabled. 3. The deployer starts a site via a framework-specific mechanism; - for example, when running tests, exploring tutorials, or deploying - single applications from a single framework. The framework controls - which site-wide components are enabled as it sees fit. + for example, when running tests, exploring tutorials, or deploying + single applications from a single framework. The framework controls + which site-wide components are enabled as it sees fit. The Bus object in this package uses topic-based publish-subscribe messaging to accomplish all this. A few topic channels are built in -('start', 'stop', 'exit', and 'graceful'). Frameworks and site containers -are free to define their own. If a message is sent to a channel that has -not been defined or has no listeners, there is no effect. +('start', 'stop', 'exit', 'graceful', 'log', and 'main'). Frameworks and +site containers are free to define their own. If a message is sent to a +channel that has not been defined or has no listeners, there is no effect. In general, there should only ever be a single Bus object per process. Frameworks and site containers share a single Bus object by publishing @@ -46,7 +46,7 @@ messages and subscribing listeners. The Bus object works as a finite state machine which models the current state of the process. Bus methods move it from one state to another; those methods then publish to subscribed listeners on the channel for -the new state. +the new state.:: O | @@ -62,16 +62,49 @@ the new state. import atexit import os -try: - set -except NameError: - from sets import Set as set import sys import threading import time import traceback as _traceback import warnings +from cherrypy._cpcompat import set + +# Here I save the value of os.getcwd(), which, if I am imported early enough, +# will be the directory from which the startup script was run. This is needed +# by _do_execv(), to change back to the original directory before execv()ing a +# new process. This is a defense against the application having changed the +# current working directory (which could make sys.executable "not found" if +# sys.executable is a relative-path, and/or cause other problems). +_startup_cwd = os.getcwd() + +class ChannelFailures(Exception): + """Exception raised when errors occur in a listener during Bus.publish().""" + delimiter = '\n' + + def __init__(self, *args, **kwargs): + # Don't use 'super' here; Exceptions are old-style in Py2.4 + # See http://www.cherrypy.org/ticket/959 + Exception.__init__(self, *args, **kwargs) + self._exceptions = list() + + def handle_exception(self): + """Append the current exception to self.""" + self._exceptions.append(sys.exc_info()[1]) + + def get_instances(self): + """Return a list of seen exception instances.""" + return self._exceptions[:] + + def __str__(self): + exception_strings = map(repr, self.get_instances()) + return self.delimiter.join(exception_strings) + + __repr__ = __str__ + + def __bool__(self): + return bool(self._exceptions) + __nonzero__ = __bool__ # Use a flag to indicate the state of the bus. class _StateEnum(object): @@ -79,7 +112,7 @@ class _StateEnum(object): name = None def __repr__(self): return "states.%s" % self.name - + def __setattr__(self, key, value): if isinstance(value, self.State): value.name = key @@ -92,68 +125,86 @@ states.STOPPING = states.State() states.EXITING = states.State() +try: + import fcntl +except ImportError: + max_files = 0 +else: + try: + max_files = os.sysconf('SC_OPEN_MAX') + except AttributeError: + max_files = 1024 + + class Bus(object): """Process state-machine and messenger for HTTP site deployment. - + All listeners for a given channel are guaranteed to be called even if others at the same channel fail. Each failure is logged, but execution proceeds on to the next listener. The only way to stop all processing from inside a listener is to raise SystemExit and stop the whole server. """ - + states = states state = states.STOPPED execv = False - + max_cloexec_files = max_files + def __init__(self): self.execv = False self.state = states.STOPPED self.listeners = dict( [(channel, set()) for channel - in ('start', 'stop', 'exit', 'graceful', 'log')]) + in ('start', 'stop', 'exit', 'graceful', 'log', 'main')]) self._priorities = {} - + def subscribe(self, channel, callback, priority=None): """Add the given callback at the given channel (if not present).""" if channel not in self.listeners: self.listeners[channel] = set() self.listeners[channel].add(callback) - + if priority is None: priority = getattr(callback, 'priority', 50) self._priorities[(channel, callback)] = priority - + def unsubscribe(self, channel, callback): """Discard the given callback (if present).""" listeners = self.listeners.get(channel) if listeners and callback in listeners: listeners.discard(callback) del self._priorities[(channel, callback)] - + def publish(self, channel, *args, **kwargs): """Return output of all subscribers for the given channel.""" if channel not in self.listeners: return [] - - exc = None + + exc = ChannelFailures() output = [] - + items = [(self._priorities[(channel, listener)], listener) for listener in self.listeners[channel]] - items.sort() + try: + items.sort(key=lambda item: item[0]) + except TypeError: + # Python 2.3 had no 'key' arg, but that doesn't matter + # since it could sort dissimilar types just fine. + items.sort() for priority, listener in items: try: output.append(listener(*args, **kwargs)) except KeyboardInterrupt: raise - except SystemExit, e: + except SystemExit: + e = sys.exc_info()[1] # If we have previous errors ensure the exit code is non-zero if exc and e.code == 0: e.code = 1 raise except: - exc = sys.exc_info()[1] + exc.handle_exception() if channel == 'log': # Assume any further messages to 'log' will fail. pass @@ -161,9 +212,9 @@ class Bus(object): self.log("Error in %r listener %r" % (channel, listener), level=40, traceback=True) if exc: - raise + raise exc return output - + def _clean_exit(self): """An atexit handler which asserts the Bus is not running.""" if self.state != states.EXITING: @@ -173,11 +224,11 @@ class Bus(object): "bus.block() after start(), or call bus.exit() before the " "main thread exits." % self.state, RuntimeWarning) self.exit() - + def start(self): """Start all services.""" atexit.register(self._clean_exit) - + self.state = states.STARTING self.log('Bus STARTING') try: @@ -189,19 +240,21 @@ class Bus(object): except: self.log("Shutting down due to error in start listener:", level=40, traceback=True) - e_info = sys.exc_info() + e_info = sys.exc_info()[1] try: self.exit() except: # Any stop/exit errors will be logged inside publish(). pass - raise e_info[0], e_info[1], e_info[2] - + # Re-raise the original error + raise e_info + def exit(self): """Stop all services and prepare to exit the process.""" + exitstate = self.state try: self.stop() - + self.state = states.EXITING self.log('Bus EXITING') self.publish('exit') @@ -214,24 +267,35 @@ class Bus(object): # can't just let exceptions propagate out unhandled. # Assume it's been logged and just die. os._exit(70) # EX_SOFTWARE - + + # Changed by Kovid, we cannot have all of calibre being quit + # Also we want to catch the port blocked/busy error and try listening only on + # the external ip + # See https://bitbucket.org/cherrypy/cherrypy/issue/1017/exit-behavior-is-not-good-when-running-in + if False and exitstate == states.STARTING: + # exit() was called before start() finished, possibly due to + # Ctrl-C because a start listener got stuck. In this case, + # we could get stuck in a loop where Ctrl-C never exits the + # process, so we just call os.exit here. + os._exit(70) # EX_SOFTWARE + def restart(self): """Restart the process (may close connections). - + This method does not restart the process from the calling thread; instead, it stops the bus and asks the main thread to call execv. """ self.execv = True self.exit() - + def graceful(self): """Advise all services to reload.""" self.log('Bus graceful') self.publish('graceful') - + def block(self, interval=0.1): """Wait for the EXITING state, KeyboardInterrupt or SystemExit. - + This function is intended to be called only by the main thread. After waiting for the EXITING state, it also waits for all threads to terminate, and then calls os.execv if self.execv is True. This @@ -239,7 +303,7 @@ class Bus(object): thread perform the actual execv call (required on some platforms). """ try: - self.wait(states.EXITING, interval=interval) + self.wait(states.EXITING, interval=interval, channel='main') except (KeyboardInterrupt, IOError): # The time.sleep call might raise # "IOError: [Errno 4] Interrupted function call" on KBInt. @@ -249,7 +313,7 @@ class Bus(object): self.log('SystemExit raised: shutting down bus') self.exit() raise - + # Waiting for ALL child threads to finish is necessary on OS X. # See http://www.cherrypy.org/ticket/581. # It's also good to let them all shut down before allowing @@ -265,22 +329,24 @@ class Bus(object): else: d = t.isDaemon() if not d: + self.log("Waiting for thread %s." % t.getName()) t.join() - + if self.execv: self._do_execv() - - def wait(self, state, interval=0.1): - """Wait for the given state(s).""" + + def wait(self, state, interval=0.1, channel=None): + """Poll for the given state(s) at intervals; publish to channel.""" if isinstance(state, (tuple, list)): states = state else: states = [state] - + def _wait(): while self.state not in states: time.sleep(interval) - + self.publish(channel) + # From http://psyco.sourceforge.net/psycoguide/bugs.html: # "The compiled machine code does not include the regular polling # done by Python, meaning that a KeyboardInterrupt will not be @@ -291,23 +357,49 @@ class Bus(object): sys.modules['psyco'].cannotcompile(_wait) except (KeyError, AttributeError): pass - + _wait() - + def _do_execv(self): """Re-execute the current process. - + This must be called from the main thread, because certain platforms (OS X) don't allow execv to be called in a child thread very well. """ args = sys.argv[:] self.log('Re-spawning %s' % ' '.join(args)) - args.insert(0, sys.executable) - if sys.platform == 'win32': - args = ['"%s"' % arg for arg in args] - - os.execv(sys.executable, args) - + + if sys.platform[:4] == 'java': + from _systemrestart import SystemRestart + raise SystemRestart + else: + args.insert(0, sys.executable) + if sys.platform == 'win32': + args = ['"%s"' % arg for arg in args] + + os.chdir(_startup_cwd) + if self.max_cloexec_files: + self._set_cloexec() + os.execv(sys.executable, args) + + def _set_cloexec(self): + """Set the CLOEXEC flag on all open files (except stdin/out/err). + + If self.max_cloexec_files is an integer (the default), then on + platforms which support it, it represents the max open files setting + for the operating system. This function will be called just before + the process is restarted via os.execv() to prevent open files + from persisting into the new process. + + Set self.max_cloexec_files to 0 to disable this behavior. + """ + for fd in range(3, self.max_cloexec_files): # skip stdin/out/err + try: + flags = fcntl.fcntl(fd, fcntl.F_GETFD) + except IOError: + continue + fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) + def stop(self): """Stop all services.""" self.state = states.STOPPING @@ -315,7 +407,7 @@ class Bus(object): self.publish('stop') self.state = states.STOPPED self.log('Bus STOPPED') - + def start_with_callback(self, func, args=None, kwargs=None): """Start 'func' in a new thread T, then start self (and return T).""" if args is None: @@ -323,23 +415,22 @@ class Bus(object): if kwargs is None: kwargs = {} args = (func,) + args - + def _callback(func, *a, **kw): self.wait(states.STARTED) func(*a, **kw) t = threading.Thread(target=_callback, args=args, kwargs=kwargs) t.setName('Bus Callback ' + t.getName()) t.start() - + self.start() - + return t - + def log(self, msg="", level=20, traceback=False): """Log the given message. Append the last traceback if requested.""" if traceback: - exc = sys.exc_info() - msg += "\n" + "".join(_traceback.format_exception(*exc)) + msg += "\n" + "".join(_traceback.format_exception(*sys.exc_info())) self.publish('log', msg, level) bus = Bus() diff --git a/src/cherrypy/scaffold/__init__.py b/src/cherrypy/scaffold/__init__.py index f50cc213d3..00964ac5f6 100644 --- a/src/cherrypy/scaffold/__init__.py +++ b/src/cherrypy/scaffold/__init__.py @@ -8,7 +8,7 @@ then tweak as desired. Even before any tweaking, this should serve a few demonstration pages. Change to this directory and run: - python cherrypy\cherryd -c cherrypy\scaffold\site.conf + ../cherryd -c site.conf """ diff --git a/src/cherrypy/scaffold/apache-fcgi.conf b/src/cherrypy/scaffold/apache-fcgi.conf new file mode 100644 index 0000000000..922398eaf8 --- /dev/null +++ b/src/cherrypy/scaffold/apache-fcgi.conf @@ -0,0 +1,22 @@ +# Apache2 server conf file for using CherryPy with mod_fcgid. + +# This doesn't have to be "C:/", but it has to be a directory somewhere, and +# MUST match the directory used in the FastCgiExternalServer directive, below. +DocumentRoot "C:/" + +ServerName 127.0.0.1 +Listen 80 +LoadModule fastcgi_module modules/mod_fastcgi.dll +LoadModule rewrite_module modules/mod_rewrite.so + +Options ExecCGI +SetHandler fastcgi-script +RewriteEngine On +# Send requests for any URI to our fastcgi handler. +RewriteRule ^(.*)$ /fastcgi.pyc [L] + +# The FastCgiExternalServer directive defines filename as an external FastCGI application. +# If filename does not begin with a slash (/) then it is assumed to be relative to the ServerRoot. +# The filename does not have to exist in the local filesystem. URIs that Apache resolves to this +# filename will be handled by this external FastCGI application. +FastCgiExternalServer "C:/fastcgi.pyc" -host 127.0.0.1:8088 \ No newline at end of file diff --git a/src/cherrypy/scaffold/site.conf b/src/cherrypy/scaffold/site.conf index 6fc8f4ec34..6ed3898373 100644 --- a/src/cherrypy/scaffold/site.conf +++ b/src/cherrypy/scaffold/site.conf @@ -5,4 +5,10 @@ server.socket_host: "0.0.0.0" server.socket_port: 8088 -tree.myapp: cherrypy.Application(scaffold.root, "/", "cherrypy/scaffold/example.conf") +# Uncomment the following lines to run on HTTPS at the same time +#server.2.socket_host: "0.0.0.0" +#server.2.socket_port: 8433 +#server.2.ssl_certificate: '../test/test.pem' +#server.2.ssl_private_key: '../test/test.pem' + +tree.myapp: cherrypy.Application(scaffold.root, "/", "example.conf") diff --git a/src/cherrypy/wsgiserver/__init__.py b/src/cherrypy/wsgiserver/__init__.py index c380e18b05..ee6190fee1 100644 --- a/src/cherrypy/wsgiserver/__init__.py +++ b/src/cherrypy/wsgiserver/__init__.py @@ -1,1794 +1,14 @@ -"""A high-speed, production ready, thread pooled, generic WSGI server. - -Simplest example on how to use this module directly -(without using CherryPy's application machinery): - - from cherrypy import wsgiserver - - def my_crazy_app(environ, start_response): - status = '200 OK' - response_headers = [('Content-type','text/plain')] - start_response(status, response_headers) - return ['Hello world!\n'] - - server = wsgiserver.CherryPyWSGIServer( - ('0.0.0.0', 8070), my_crazy_app, - server_name='www.cherrypy.example') - -The CherryPy WSGI server can serve as many WSGI applications -as you want in one instance by using a WSGIPathInfoDispatcher: - - d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) - server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) - -Want SSL support? Just set these attributes: - - server.ssl_certificate = - server.ssl_private_key = - - if __name__ == '__main__': - try: - server.start() - except KeyboardInterrupt: - server.stop() - -This won't call the CherryPy engine (application side) at all, only the -WSGI server, which is independant from the rest of CherryPy. Don't -let the name "CherryPyWSGIServer" throw you; the name merely reflects -its origin, not its coupling. - -For those of you wanting to understand internals of this module, here's the -basic call flow. The server's listening thread runs a very tight loop, -sticking incoming connections onto a Queue: - - server = CherryPyWSGIServer(...) - server.start() - while True: - tick() - # This blocks until a request comes in: - child = socket.accept() - conn = HTTPConnection(child, ...) - server.requests.put(conn) - -Worker threads are kept in a pool and poll the Queue, popping off and then -handling each connection in turn. Each connection can consist of an arbitrary -number of requests and their responses, so we run a nested loop: - - while True: - conn = server.requests.get() - conn.communicate() - -> while True: - req = HTTPRequest(...) - req.parse_request() - -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" - req.rfile.readline() - req.read_headers() - req.respond() - -> response = wsgi_app(...) - try: - for chunk in response: - if chunk: - req.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - if req.close_connection: - return -""" - - -import base64 -import os -import Queue -import re -quoted_slash = re.compile("(?i)%2F") -import rfc822 -import socket -try: - import cStringIO as StringIO -except ImportError: - import StringIO - -_fileobject_uses_str_type = isinstance(socket._fileobject(None)._rbuf, basestring) +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] import sys -import threading -import time -import traceback -from urllib import unquote -from urlparse import urlparse -import warnings - -try: - from OpenSSL import SSL - from OpenSSL import crypto -except ImportError: - SSL = None - -import errno - -def plat_specific_errors(*errnames): - """Return error numbers for all errors in errnames on this platform. - - The 'errno' module contains different global constants depending on - the specific platform (OS). This function will return the list of - numeric values for a given list of potential names. - """ - errno_names = dir(errno) - nums = [getattr(errno, k) for k in errnames if k in errno_names] - # de-dupe the list - return dict.fromkeys(nums).keys() - -socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") - -socket_errors_to_ignore = plat_specific_errors( - "EPIPE", - "EBADF", "WSAEBADF", - "ENOTSOCK", "WSAENOTSOCK", - "ETIMEDOUT", "WSAETIMEDOUT", - "ECONNREFUSED", "WSAECONNREFUSED", - "ECONNRESET", "WSAECONNRESET", - "ECONNABORTED", "WSAECONNABORTED", - "ENETRESET", "WSAENETRESET", - "EHOSTDOWN", "EHOSTUNREACH", - ) -socket_errors_to_ignore.append("timed out") - -socket_errors_nonblocking = plat_specific_errors( - 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') - -comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING', - 'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL', - 'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT', - 'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE', - 'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING', - 'WWW-AUTHENTICATE'] - - -class WSGIPathInfoDispatcher(object): - """A WSGI dispatcher for dispatch based on the PATH_INFO. - - apps: a dict or list of (path_prefix, app) pairs. - """ - - def __init__(self, apps): - try: - apps = apps.items() - except AttributeError: - pass - - # Sort the apps by len(path), descending - apps.sort() - apps.reverse() - - # The path_prefix strings must start, but not end, with a slash. - # Use "" instead of "/". - self.apps = [(p.rstrip("/"), a) for p, a in apps] - - def __call__(self, environ, start_response): - path = environ["PATH_INFO"] or "/" - for p, app in self.apps: - # The apps list should be sorted by length, descending. - if path.startswith(p + "/") or path == p: - environ = environ.copy() - environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p - environ["PATH_INFO"] = path[len(p):] - return app(environ, start_response) - - start_response('404 Not Found', [('Content-Type', 'text/plain'), - ('Content-Length', '0')]) - return [''] - - -class MaxSizeExceeded(Exception): - pass - -class SizeCheckWrapper(object): - """Wraps a file-like object, raising MaxSizeExceeded if too large.""" - - def __init__(self, rfile, maxlen): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - - def _check_length(self): - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded() - - def read(self, size=None): - data = self.rfile.read(size) - self.bytes_read += len(data) - self._check_length() - return data - - def readline(self, size=None): - if size is not None: - data = self.rfile.readline(size) - self.bytes_read += len(data) - self._check_length() - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - while True: - data = self.rfile.readline(256) - self.bytes_read += len(data) - self._check_length() - res.append(data) - # See http://www.cherrypy.org/ticket/421 - if len(data) < 256 or data[-1:] == "\n": - return ''.join(res) - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def next(self): - data = self.rfile.next() - self.bytes_read += len(data) - self._check_length() - return data - - -class HTTPRequest(object): - """An HTTP Request (and response). - - A single HTTP connection may consist of multiple request/response pairs. - - send: the 'send' method from the connection's socket object. - wsgi_app: the WSGI application to call. - environ: a partial WSGI environ (server and connection entries). - The caller MUST set the following entries: - * All wsgi.* entries, including .input - * SERVER_NAME and SERVER_PORT - * Any SSL_* entries - * Any custom entries like REMOTE_ADDR and REMOTE_PORT - * SERVER_SOFTWARE: the value to write in the "Server" response header. - * ACTUAL_SERVER_PROTOCOL: the value to write in the Status-Line of - the response. From RFC 2145: "An HTTP server SHOULD send a - response version equal to the highest version for which the - server is at least conditionally compliant, and whose major - version is less than or equal to the one received in the - request. An HTTP server MUST NOT send a version for which - it is not at least conditionally compliant." - - outheaders: a list of header tuples to write in the response. - ready: when True, the request has been parsed and is ready to begin - generating the response. When False, signals the calling Connection - that the response should not be generated and the connection should - close. - close_connection: signals the calling Connection that the request - should close. This does not imply an error! The client and/or - server may each request that the connection be closed. - chunked_write: if True, output will be encoded with the "chunked" - transfer-coding. This value is set automatically inside - send_headers. - """ - - max_request_header_size = 0 - max_request_body_size = 0 - - def __init__(self, wfile, environ, wsgi_app): - self.rfile = environ['wsgi.input'] - self.wfile = wfile - self.environ = environ.copy() - self.wsgi_app = wsgi_app - - self.ready = False - self.started_response = False - self.status = "" - self.outheaders = [] - self.sent_headers = False - self.close_connection = False - self.chunked_write = False - - def parse_request(self): - """Parse the next HTTP request start-line and message-headers.""" - self.rfile.maxlen = self.max_request_header_size - self.rfile.bytes_read = 0 - - try: - self._parse_request() - except MaxSizeExceeded: - self.simple_response("413 Request Entity Too Large") - return - - def _parse_request(self): - # HTTP/1.1 connections are persistent by default. If a client - # requests a page, then idles (leaves the connection open), - # then rfile.readline() will raise socket.error("timed out"). - # Note that it does this based on the value given to settimeout(), - # and doesn't need the client to request or acknowledge the close - # (although your TCP stack might suffer for it: cf Apache's history - # with FIN_WAIT_2). - request_line = self.rfile.readline() - if not request_line: - # Force self.ready = False so the connection will close. - self.ready = False - return - - if request_line == "\r\n": - # RFC 2616 sec 4.1: "...if the server is reading the protocol - # stream at the beginning of a message and receives a CRLF - # first, it should ignore the CRLF." - # But only ignore one leading line! else we enable a DoS. - request_line = self.rfile.readline() - if not request_line: - self.ready = False - return - - environ = self.environ - - try: - method, path, req_protocol = request_line.strip().split(" ", 2) - except ValueError: - self.simple_response(400, "Malformed Request-Line") - return - - environ["REQUEST_METHOD"] = method - - # path may be an abs_path (including "http://host.domain.tld"); - scheme, location, path, params, qs, frag = urlparse(path) - - if frag: - self.simple_response("400 Bad Request", - "Illegal #fragment in Request-URI.") - return - - if scheme: - environ["wsgi.url_scheme"] = scheme - if params: - path = path + ";" + params - - environ["SCRIPT_NAME"] = "" - - # Unquote the path+params (e.g. "/this%20path" -> "this path"). - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 - # - # But note that "...a URI must be separated into its components - # before the escaped characters within those components can be - # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 - atoms = [unquote(x) for x in quoted_slash.split(path)] - path = "%2F".join(atoms) - environ["PATH_INFO"] = path - - # Note that, like wsgiref and most other WSGI servers, - # we unquote the path but not the query string. - environ["QUERY_STRING"] = qs - - # Compare request and server HTTP protocol versions, in case our - # server does not support the requested protocol. Limit our output - # to min(req, server). We want the following output: - # request server actual written supported response - # protocol protocol response protocol feature set - # a 1.0 1.0 1.0 1.0 - # b 1.0 1.1 1.1 1.0 - # c 1.1 1.0 1.0 1.0 - # d 1.1 1.1 1.1 1.1 - # Notice that, in (b), the response will be "HTTP/1.1" even though - # the client only understands 1.0. RFC 2616 10.5.6 says we should - # only return 505 if the _major_ version is different. - rp = int(req_protocol[5]), int(req_protocol[7]) - server_protocol = environ["ACTUAL_SERVER_PROTOCOL"] - sp = int(server_protocol[5]), int(server_protocol[7]) - if sp[0] != rp[0]: - self.simple_response("505 HTTP Version Not Supported") - return - # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. - environ["SERVER_PROTOCOL"] = req_protocol - self.response_protocol = "HTTP/%s.%s" % min(rp, sp) - - # If the Request-URI was an absoluteURI, use its location atom. - if location: - environ["SERVER_NAME"] = location - - # then all the http headers - try: - self.read_headers() - except ValueError, ex: - self.simple_response("400 Bad Request", repr(ex.args)) - return - - mrbs = self.max_request_body_size - if mrbs and int(environ.get("CONTENT_LENGTH", 0)) > mrbs: - self.simple_response("413 Request Entity Too Large") - return - - # Persistent connection support - if self.response_protocol == "HTTP/1.1": - # Both server and client are HTTP/1.1 - if environ.get("HTTP_CONNECTION", "") == "close": - self.close_connection = True - else: - # Either the server or client (or both) are HTTP/1.0 - if environ.get("HTTP_CONNECTION", "") != "Keep-Alive": - self.close_connection = True - - # Transfer-Encoding support - te = None - if self.response_protocol == "HTTP/1.1": - te = environ.get("HTTP_TRANSFER_ENCODING") - if te: - te = [x.strip().lower() for x in te.split(",") if x.strip()] - - self.chunked_read = False - - if te: - for enc in te: - if enc == "chunked": - self.chunked_read = True - else: - # Note that, even if we see "chunked", we must reject - # if there is an extension we don't recognize. - self.simple_response("501 Unimplemented") - self.close_connection = True - return - - # From PEP 333: - # "Servers and gateways that implement HTTP 1.1 must provide - # transparent support for HTTP 1.1's "expect/continue" mechanism. - # This may be done in any of several ways: - # 1. Respond to requests containing an Expect: 100-continue request - # with an immediate "100 Continue" response, and proceed normally. - # 2. Proceed with the request normally, but provide the application - # with a wsgi.input stream that will send the "100 Continue" - # response if/when the application first attempts to read from - # the input stream. The read request must then remain blocked - # until the client responds. - # 3. Wait until the client decides that the server does not support - # expect/continue, and sends the request body on its own. - # (This is suboptimal, and is not recommended.) - # - # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, - # but it seems like it would be a big slowdown for such a rare case. - if environ.get("HTTP_EXPECT", "") == "100-continue": - self.simple_response(100) - - self.ready = True - - def read_headers(self): - """Read header lines from the incoming stream.""" - environ = self.environ - - while True: - line = self.rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - if line == '\r\n': - # Normal end of headers - break - - if line[0] in ' \t': - # It's a continuation line. - v = line.strip() - else: - k, v = line.split(":", 1) - k, v = k.strip().upper(), v.strip() - envname = "HTTP_" + k.replace("-", "_") - - if k in comma_separated_headers: - existing = environ.get(envname) - if existing: - v = ", ".join((existing, v)) - environ[envname] = v - - ct = environ.pop("HTTP_CONTENT_TYPE", None) - if ct is not None: - environ["CONTENT_TYPE"] = ct - cl = environ.pop("HTTP_CONTENT_LENGTH", None) - if cl is not None: - environ["CONTENT_LENGTH"] = cl - - def decode_chunked(self): - """Decode the 'chunked' transfer coding.""" - cl = 0 - data = StringIO.StringIO() - while True: - line = self.rfile.readline().strip().split(";", 1) - chunk_size = int(line.pop(0), 16) - if chunk_size <= 0: - break -## if line: chunk_extension = line[0] - cl += chunk_size - data.write(self.rfile.read(chunk_size)) - crlf = self.rfile.read(2) - if crlf != "\r\n": - self.simple_response("400 Bad Request", - "Bad chunked transfer coding " - "(expected '\\r\\n', got %r)" % crlf) - return - - # Grab any trailer headers - self.read_headers() - - data.seek(0) - self.environ["wsgi.input"] = data - self.environ["CONTENT_LENGTH"] = str(cl) or "" - return True - - def respond(self): - """Call the appropriate WSGI app and write its iterable output.""" - # Set rfile.maxlen to ensure we don't read past Content-Length. - # This will also be used to read the entire request body if errors - # are raised before the app can read the body. - if self.chunked_read: - # If chunked, Content-Length will be 0. - self.rfile.maxlen = self.max_request_body_size - else: - cl = int(self.environ.get("CONTENT_LENGTH", 0)) - if self.max_request_body_size: - self.rfile.maxlen = min(cl, self.max_request_body_size) - else: - self.rfile.maxlen = cl - self.rfile.bytes_read = 0 - - try: - self._respond() - except MaxSizeExceeded: - if not self.sent_headers: - self.simple_response("413 Request Entity Too Large") - return - - def _respond(self): - if self.chunked_read: - if not self.decode_chunked(): - self.close_connection = True - return - - response = self.wsgi_app(self.environ, self.start_response) - try: - for chunk in response: - # "The start_response callable must not actually transmit - # the response headers. Instead, it must store them for the - # server or gateway to transmit only after the first - # iteration of the application return value that yields - # a NON-EMPTY string, or upon the application's first - # invocation of the write() callable." (PEP 333) - if chunk: - self.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - - if (self.ready and not self.sent_headers): - self.sent_headers = True - self.send_headers() - if self.chunked_write: - self.wfile.sendall("0\r\n\r\n") - - def simple_response(self, status, msg=""): - """Write a simple response back to the client.""" - status = str(status) - buf = ["%s %s\r\n" % (self.environ['ACTUAL_SERVER_PROTOCOL'], status), - "Content-Length: %s\r\n" % len(msg), - "Content-Type: text/plain\r\n"] - - if status[:3] == "413" and self.response_protocol == 'HTTP/1.1': - # Request Entity Too Large - self.close_connection = True - buf.append("Connection: close\r\n") - - buf.append("\r\n") - if msg: - buf.append(msg) - - try: - self.wfile.sendall("".join(buf)) - except socket.error, x: - if x.args[0] not in socket_errors_to_ignore: - raise - - def start_response(self, status, headers, exc_info = None): - """WSGI callable to begin the HTTP response.""" - # "The application may call start_response more than once, - # if and only if the exc_info argument is provided." - if self.started_response and not exc_info: - raise AssertionError("WSGI start_response called a second " - "time with no exc_info.") - - # "if exc_info is provided, and the HTTP headers have already been - # sent, start_response must raise an error, and should raise the - # exc_info tuple." - if self.sent_headers: - try: - raise exc_info[0], exc_info[1], exc_info[2] - finally: - exc_info = None - - self.started_response = True - self.status = status - self.outheaders.extend(headers) - return self.write - - def write(self, chunk): - """WSGI callable to write unbuffered data to the client. - - This method is also used internally by start_response (to write - data from the iterable returned by the WSGI application). - """ - if not self.started_response: - raise AssertionError("WSGI write called before start_response.") - - if not self.sent_headers: - self.sent_headers = True - self.send_headers() - - if self.chunked_write and chunk: - buf = [hex(len(chunk))[2:], "\r\n", chunk, "\r\n"] - self.wfile.sendall("".join(buf)) - else: - self.wfile.sendall(chunk) - - def send_headers(self): - """Assert, process, and send the HTTP response message-headers.""" - hkeys = [key.lower() for key, value in self.outheaders] - status = int(self.status[:3]) - - if status == 413: - # Request Entity Too Large. Close conn to avoid garbage. - self.close_connection = True - elif "content-length" not in hkeys: - # "All 1xx (informational), 204 (no content), - # and 304 (not modified) responses MUST NOT - # include a message-body." So no point chunking. - if status < 200 or status in (204, 205, 304): - pass - else: - if (self.response_protocol == 'HTTP/1.1' - and self.environ["REQUEST_METHOD"] != 'HEAD'): - # Use the chunked transfer-coding - self.chunked_write = True - self.outheaders.append(("Transfer-Encoding", "chunked")) - else: - # Closing the conn is the only way to determine len. - self.close_connection = True - - if "connection" not in hkeys: - if self.response_protocol == 'HTTP/1.1': - # Both server and client are HTTP/1.1 or better - if self.close_connection: - self.outheaders.append(("Connection", "close")) - else: - # Server and/or client are HTTP/1.0 - if not self.close_connection: - self.outheaders.append(("Connection", "Keep-Alive")) - - if (not self.close_connection) and (not self.chunked_read): - # Read any remaining request body data on the socket. - # "If an origin server receives a request that does not include an - # Expect request-header field with the "100-continue" expectation, - # the request includes a request body, and the server responds - # with a final status code before reading the entire request body - # from the transport connection, then the server SHOULD NOT close - # the transport connection until it has read the entire request, - # or until the client closes the connection. Otherwise, the client - # might not reliably receive the response message. However, this - # requirement is not be construed as preventing a server from - # defending itself against denial-of-service attacks, or from - # badly broken client implementations." - size = self.rfile.maxlen - self.rfile.bytes_read - if size > 0: - self.rfile.read(size) - - if "date" not in hkeys: - self.outheaders.append(("Date", rfc822.formatdate())) - - if "server" not in hkeys: - self.outheaders.append(("Server", self.environ['SERVER_SOFTWARE'])) - - buf = [self.environ['ACTUAL_SERVER_PROTOCOL'], " ", self.status, "\r\n"] - try: - buf += [k + ": " + v + "\r\n" for k, v in self.outheaders] - except TypeError: - if not isinstance(k, str): - raise TypeError("WSGI response header key %r is not a string.") - if not isinstance(v, str): - raise TypeError("WSGI response header value %r is not a string.") - else: - raise - buf.append("\r\n") - self.wfile.sendall("".join(buf)) - - -class NoSSLError(Exception): - """Exception raised when a client speaks HTTP to an HTTPS socket.""" - pass - - -class FatalSSLAlert(Exception): - """Exception raised when the SSL implementation signals a fatal alert.""" - pass - - -if not _fileobject_uses_str_type: - class CP_fileobject(socket._fileobject): - """Faux file object attached to a socket object.""" - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - return self._sock.send(data) - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - return self._sock.recv(size) - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(rbufsize) - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - data = self.recv(left) - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self.recv - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(self._rbufsize) - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(self._rbufsize) - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - +if sys.version_info < (3, 0): + from wsgiserver2 import * else: - class CP_fileobject(socket._fileobject): - """Faux file object attached to a socket object.""" - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - return self._sock.send(data) - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - return self._sock.recv(size) - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - def read(self, size=-1): - if size < 0: - # Read until EOF - buffers = [self._rbuf] - self._rbuf = "" - if self._rbufsize <= 1: - recv_size = self.default_bufsize - else: - recv_size = self._rbufsize - - while True: - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - return "".join(buffers) - else: - # Read until size bytes or EOF seen, whichever comes first - data = self._rbuf - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - left = size - buf_len - recv_size = max(self._rbufsize, left) - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - def readline(self, size=-1): - data = self._rbuf - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - assert data == "" - buffers = [] - while data != "\n": - data = self.recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - return "".join(buffers) - else: - # Read until size bytes or \n or EOF seen, whichever comes first - nl = data.find('\n', 0, size) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - left = size - buf_len - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - -class SSL_fileobject(CP_fileobject): - """SSL file object attached to a socket object.""" - - ssl_timeout = 3 - ssl_retry = .01 - - def _safe_call(self, is_reader, call, *args, **kwargs): - """Wrap the given call with SSL error-trapping. - - is_reader: if False EOF errors will be raised. If True, EOF errors - will return "" (to emulate normal sockets). - """ - start = time.time() - while True: - try: - return call(*args, **kwargs) - except SSL.WantReadError: - # Sleep and try again. This is dangerous, because it means - # the rest of the stack has no way of differentiating - # between a "new handshake" error and "client dropped". - # Note this isn't an endless loop: there's a timeout below. - time.sleep(self.ssl_retry) - except SSL.WantWriteError: - time.sleep(self.ssl_retry) - except SSL.SysCallError, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - errnum = e.args[0] - if is_reader and errnum in socket_errors_to_ignore: - return "" - raise socket.error(errnum) - except SSL.Error, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - thirdarg = None - try: - thirdarg = e.args[0][0][2] - except IndexError: - pass - - if thirdarg == 'http request': - # The client is talking HTTP to an HTTPS server. - raise NoSSLError() - raise FatalSSLAlert(*e.args) - except: - raise - - if time.time() - start > self.ssl_timeout: - raise socket.timeout("timed out") - - def recv(self, *args, **kwargs): - buf = [] - r = super(SSL_fileobject, self).recv - while True: - data = self._safe_call(True, r, *args, **kwargs) - buf.append(data) - p = self._sock.pending() - if not p: - return "".join(buf) - - def sendall(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).sendall, *args, **kwargs) - - def send(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).send, *args, **kwargs) - - -class HTTPConnection(object): - """An HTTP connection (active socket). - - socket: the raw socket object (usually TCP) for this connection. - wsgi_app: the WSGI application for this server/connection. - environ: a WSGI environ template. This will be copied for each request. - - rfile: a fileobject for reading from the socket. - send: a function for writing (+ flush) to the socket. - """ - - rbufsize = -1 - RequestHandlerClass = HTTPRequest - environ = {"wsgi.version": (1, 0), - "wsgi.url_scheme": "http", - "wsgi.multithread": True, - "wsgi.multiprocess": False, - "wsgi.run_once": False, - "wsgi.errors": sys.stderr, - } - - def __init__(self, sock, wsgi_app, environ): - self.socket = sock - self.wsgi_app = wsgi_app - - # Copy the class environ into self. - self.environ = self.environ.copy() - self.environ.update(environ) - - if SSL and isinstance(sock, SSL.ConnectionType): - timeout = sock.gettimeout() - self.rfile = SSL_fileobject(sock, "rb", self.rbufsize) - self.rfile.ssl_timeout = timeout - self.wfile = SSL_fileobject(sock, "wb", -1) - self.wfile.ssl_timeout = timeout - else: - self.rfile = CP_fileobject(sock, "rb", self.rbufsize) - self.wfile = CP_fileobject(sock, "wb", -1) - - # Wrap wsgi.input but not HTTPConnection.rfile itself. - # We're also not setting maxlen yet; we'll do that separately - # for headers and body for each iteration of self.communicate - # (if maxlen is 0 the wrapper doesn't check length). - self.environ["wsgi.input"] = SizeCheckWrapper(self.rfile, 0) - - def communicate(self): - """Read each request and respond appropriately.""" - try: - while True: - # (re)set req to None so that if something goes wrong in - # the RequestHandlerClass constructor, the error doesn't - # get written to the previous request. - req = None - req = self.RequestHandlerClass(self.wfile, self.environ, - self.wsgi_app) - - # This order of operations should guarantee correct pipelining. - req.parse_request() - if not req.ready: - return - - req.respond() - if req.close_connection: - return - - except socket.error, e: - errnum = e.args[0] - if errnum == 'timed out': - if req and not req.sent_headers: - req.simple_response("408 Request Timeout") - elif errnum not in socket_errors_to_ignore: - if req and not req.sent_headers: - req.simple_response("500 Internal Server Error", - format_exc()) - return - except (KeyboardInterrupt, SystemExit): - raise - except FatalSSLAlert, e: - # Close the connection. - return - except NoSSLError: - if req and not req.sent_headers: - # Unwrap our wfile - req.wfile = CP_fileobject(self.socket._sock, "wb", -1) - req.simple_response("400 Bad Request", - "The client sent a plain HTTP request, but " - "this server only speaks HTTPS on this port.") - self.linger = True - except Exception, e: - if req and not req.sent_headers: - req.simple_response("500 Internal Server Error", format_exc()) - - linger = False - - def close(self): - """Close the socket underlying this connection.""" - self.rfile.close() - - if not self.linger: - # Python's socket module does NOT call close on the kernel socket - # when you call socket.close(). We do so manually here because we - # want this server to send a FIN TCP segment immediately. Note this - # must be called *before* calling socket.close(), because the latter - # drops its reference to the kernel socket. - self.socket._sock.close() - self.socket.close() - else: - # On the other hand, sometimes we want to hang around for a bit - # to make sure the client has a chance to read our entire - # response. Skipping the close() calls here delays the FIN - # packet until the socket object is garbage-collected later. - # Someday, perhaps, we'll do the full lingering_close that - # Apache does, but not today. - pass - - -def format_exc(limit=None): - """Like print_exc() but return a string. Backport for Python 2.3.""" - try: - etype, value, tb = sys.exc_info() - return ''.join(traceback.format_exception(etype, value, tb, limit)) - finally: - etype = value = tb = None - - -_SHUTDOWNREQUEST = None - -class WorkerThread(threading.Thread): - """Thread which continuously polls a Queue for Connection objects. - - server: the HTTP Server which spawned this thread, and which owns the - Queue and is placing active connections into it. - ready: a simple flag for the calling server to know when this thread - has begun polling the Queue. - - Due to the timing issues of polling a Queue, a WorkerThread does not - check its own 'ready' flag after it has started. To stop the thread, - it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue - (one for each running WorkerThread). - """ - - conn = None - - def __init__(self, server): - self.ready = False - self.server = server - threading.Thread.__init__(self) - - def run(self): - try: - self.ready = True - while True: - conn = self.server.requests.get() - if conn is _SHUTDOWNREQUEST: - return - - self.conn = conn - try: - conn.communicate() - finally: - conn.close() - self.conn = None - except (KeyboardInterrupt, SystemExit), exc: - self.server.interrupt = exc - - -class ThreadPool(object): - """A Request Queue for the CherryPyWSGIServer which pools threads. - - ThreadPool objects must provide min, get(), put(obj), start() - and stop(timeout) attributes. - """ - - def __init__(self, server, min=10, max=-1): - self.server = server - self.min = min - self.max = max - self._threads = [] - self._queue = Queue.Queue() - self.get = self._queue.get - - def start(self): - """Start the pool of threads.""" - for i in xrange(self.min): - self._threads.append(WorkerThread(self.server)) - for worker in self._threads: - worker.setName("CP WSGIServer " + worker.getName()) - worker.start() - for worker in self._threads: - while not worker.ready: - time.sleep(.1) - - def _get_idle(self): - """Number of worker threads which are idle. Read-only.""" - return len([t for t in self._threads if t.conn is None]) - idle = property(_get_idle, doc=_get_idle.__doc__) - - def put(self, obj): - self._queue.put(obj) - if obj is _SHUTDOWNREQUEST: - return - - def grow(self, amount): - """Spawn new worker threads (not above self.max).""" - for i in xrange(amount): - if self.max > 0 and len(self._threads) >= self.max: - break - worker = WorkerThread(self.server) - worker.setName("CP WSGIServer " + worker.getName()) - self._threads.append(worker) - worker.start() - - def shrink(self, amount): - """Kill off worker threads (not below self.min).""" - # Grow/shrink the pool if necessary. - # Remove any dead threads from our list - for t in self._threads: - if not t.isAlive(): - self._threads.remove(t) - amount -= 1 - - if amount > 0: - for i in xrange(min(amount, len(self._threads) - self.min)): - # Put a number of shutdown requests on the queue equal - # to 'amount'. Once each of those is processed by a worker, - # that worker will terminate and be culled from our list - # in self.put. - self._queue.put(_SHUTDOWNREQUEST) - - def stop(self, timeout=5): - # Must shut down threads here so the code that calls - # this method can know when all threads are stopped. - for worker in self._threads: - self._queue.put(_SHUTDOWNREQUEST) - - # Don't join currentThread (when stop is called inside a request). - current = threading.currentThread() - while self._threads: - worker = self._threads.pop() - if worker is not current and worker.isAlive(): - try: - if timeout is None or timeout < 0: - worker.join() - else: - worker.join(timeout) - if worker.isAlive(): - # We exhausted the timeout. - # Forcibly shut down the socket. - c = worker.conn - if c and not c.rfile.closed: - if SSL and isinstance(c.socket, SSL.ConnectionType): - # pyOpenSSL.socket.shutdown takes no args - c.socket.shutdown() - else: - c.socket.shutdown(socket.SHUT_RD) - worker.join() - except (AssertionError, - # Ignore repeated Ctrl-C. - # See http://www.cherrypy.org/ticket/691. - KeyboardInterrupt), exc1: - pass - - - -class SSLConnection: - """A thread-safe wrapper for an SSL.Connection. - - *args: the arguments to create the wrapped SSL.Connection(*args). - """ - - def __init__(self, *args): - self._ssl_conn = SSL.Connection(*args) - self._lock = threading.RLock() - - for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', - 'renegotiate', 'bind', 'listen', 'connect', 'accept', - 'setblocking', 'fileno', 'shutdown', 'close', 'get_cipher_list', - 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', - 'makefile', 'get_app_data', 'set_app_data', 'state_string', - 'sock_shutdown', 'get_peer_certificate', 'want_read', - 'want_write', 'set_connect_state', 'set_accept_state', - 'connect_ex', 'sendall', 'settimeout'): - exec """def %s(self, *args): - self._lock.acquire() - try: - return self._ssl_conn.%s(*args) - finally: - self._lock.release() -""" % (f, f) - - -try: - import fcntl -except ImportError: - try: - from ctypes import windll, WinError - except ImportError: - def prevent_socket_inheritance(sock): - """Dummy function, since neither fcntl nor ctypes are available.""" - pass - else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (Windows).""" - if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): - raise WinError() -else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (POSIX).""" - fd = sock.fileno() - old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) - - -class CherryPyWSGIServer(object): - """An HTTP server for WSGI. - - bind_addr: The interface on which to listen for connections. - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string. - wsgi_app: the WSGI 'application callable'; multiple WSGI applications - may be passed as (path_prefix, app) pairs. - numthreads: the number of worker threads to create (default 10). - server_name: the string to set for WSGI's SERVER_NAME environ entry. - Defaults to socket.gethostname(). - max: the maximum number of queued requests (defaults to -1 = no limit). - request_queue_size: the 'backlog' argument to socket.listen(); - specifies the maximum number of queued connections (default 5). - timeout: the timeout in seconds for accepted connections (default 10). - - nodelay: if True (the default since 3.1), sets the TCP_NODELAY socket - option. - - protocol: the version string to write in the Status-Line of all - HTTP responses. For example, "HTTP/1.1" (the default). This - also limits the supported features used in the response. - - - SSL/HTTPS - --------- - The OpenSSL module must be importable for SSL functionality. - You can obtain it from http://pyopenssl.sourceforge.net/ - - ssl_certificate: the filename of the server SSL certificate. - ssl_privatekey: the filename of the server's private key file. - - If either of these is None (both are None by default), this server - will not use SSL. If both are given and are valid, they will be read - on server start and used in the SSL context for the listening socket. - """ - - protocol = "HTTP/1.1" - _bind_addr = "127.0.0.1" - version = "CherryPy/3.1.2" - ready = False - _interrupt = None - - nodelay = True - - ConnectionClass = HTTPConnection - environ = {} - - # Paths to certificate and private key files - ssl_certificate = None - ssl_private_key = None - - def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, - max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): - self.requests = ThreadPool(self, min=numthreads or 1, max=max) - - if callable(wsgi_app): - # We've been handed a single wsgi_app, in CP-2.1 style. - # Assume it's mounted at "". - self.wsgi_app = wsgi_app - else: - # We've been handed a list of (path_prefix, wsgi_app) tuples, - # so that the server can call different wsgi_apps, and also - # correctly set SCRIPT_NAME. - warnings.warn("The ability to pass multiple apps is deprecated " - "and will be removed in 3.2. You should explicitly " - "include a WSGIPathInfoDispatcher instead.", - DeprecationWarning) - self.wsgi_app = WSGIPathInfoDispatcher(wsgi_app) - - self.bind_addr = bind_addr - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.request_queue_size = request_queue_size - - self.timeout = timeout - self.shutdown_timeout = shutdown_timeout - - def _get_numthreads(self): - return self.requests.min - def _set_numthreads(self, value): - self.requests.min = value - numthreads = property(_get_numthreads, _set_numthreads) - - def __str__(self): - return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, - self.bind_addr) - - def _get_bind_addr(self): - return self._bind_addr - def _set_bind_addr(self, value): - if isinstance(value, tuple) and value[0] in ('', None): - # Despite the socket module docs, using '' does not - # allow AI_PASSIVE to work. Passing None instead - # returns '0.0.0.0' like we want. In other words: - # host AI_PASSIVE result - # '' Y 192.168.x.y - # '' N 192.168.x.y - # None Y 0.0.0.0 - # None N 127.0.0.1 - # But since you can get the same effect with an explicit - # '0.0.0.0', we deny both the empty string and None as values. - raise ValueError("Host values of '' or None are not allowed. " - "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " - "to listen on all active interfaces.") - self._bind_addr = value - bind_addr = property(_get_bind_addr, _set_bind_addr, - doc="""The interface on which to listen for connections. - - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string.""") - - def start(self): - """Run the server forever.""" - # We don't have to trap KeyboardInterrupt or SystemExit here, - # because cherrpy.server already does so, calling self.stop() for us. - # If you're using this server with another framework, you should - # trap those exceptions in whatever code block calls start(). - self._interrupt = None - - # Select the appropriate socket - if isinstance(self.bind_addr, basestring): - # AF_UNIX socket - - # So we can reuse the socket... - try: os.unlink(self.bind_addr) - except: pass - - # So everyone can access the socket... - try: os.chmod(self.bind_addr, 0777) - except: pass - - info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] - else: - # AF_INET or AF_INET6 socket - # Get the correct address family for our host (allows IPv6 addresses) - host, port = self.bind_addr - try: - info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM, 0, socket.AI_PASSIVE) - except socket.gaierror: - # Probably a DNS issue. Assume IPv4. - info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", self.bind_addr)] - - self.socket = None - msg = "No socket could be created" - for res in info: - af, socktype, proto, canonname, sa = res - try: - self.bind(af, socktype, proto) - except socket.error, msg: - if self.socket: - self.socket.close() - self.socket = None - continue - break - if not self.socket: - raise socket.error, msg - - # Timeout so KeyboardInterrupt can be caught on Win32 - self.socket.settimeout(1) - self.socket.listen(self.request_queue_size) - - # Create worker threads - self.requests.start() - - self.ready = True - while self.ready: - self.tick() - if self.interrupt: - while self.interrupt is True: - # Wait for self.stop() to complete. See _set_interrupt. - time.sleep(0.1) - if self.interrupt: - raise self.interrupt - - def bind(self, family, type, proto=0): - """Create (or recreate) the actual socket object.""" - self.socket = socket.socket(family, type, proto) - prevent_socket_inheritance(self.socket) - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - if self.nodelay: - self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - if self.ssl_certificate and self.ssl_private_key: - if SSL is None: - raise ImportError("You must install pyOpenSSL to use HTTPS.") - - # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 - ctx = SSL.Context(SSL.SSLv23_METHOD) - ctx.use_privatekey_file(self.ssl_private_key) - ctx.use_certificate_file(self.ssl_certificate) - self.socket = SSLConnection(ctx, self.socket) - self.populate_ssl_environ() - - # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), - # activate dual-stack. See http://www.cherrypy.org/ticket/871. - if (not isinstance(self.bind_addr, basestring) - and self.bind_addr[0] == '::' and family == socket.AF_INET6): - try: - self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) - except (AttributeError, socket.error): - # Apparently, the socket option is not available in - # this machine's TCP stack - pass - - self.socket.bind(self.bind_addr) - - def tick(self): - """Accept a new connection and put it on the Queue.""" - try: - s, addr = self.socket.accept() - prevent_socket_inheritance(s) - if not self.ready: - return - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - environ = self.environ.copy() - # SERVER_SOFTWARE is common for IIS. It's also helpful for - # us to pass a default value for the "Server" response header. - if environ.get("SERVER_SOFTWARE") is None: - environ["SERVER_SOFTWARE"] = "%s WSGI Server" % self.version - # set a non-standard environ entry so the WSGI app can know what - # the *real* server protocol is (and what features to support). - # See http://www.faqs.org/rfcs/rfc2145.html. - environ["ACTUAL_SERVER_PROTOCOL"] = self.protocol - environ["SERVER_NAME"] = self.server_name - - if isinstance(self.bind_addr, basestring): - # AF_UNIX. This isn't really allowed by WSGI, which doesn't - # address unix domain sockets. But it's better than nothing. - environ["SERVER_PORT"] = "" - else: - environ["SERVER_PORT"] = str(self.bind_addr[1]) - # optional values - # Until we do DNS lookups, omit REMOTE_HOST - environ["REMOTE_ADDR"] = addr[0] - environ["REMOTE_PORT"] = str(addr[1]) - - conn = self.ConnectionClass(s, self.wsgi_app, environ) - self.requests.put(conn) - except socket.timeout: - # The only reason for the timeout in start() is so we can - # notice keyboard interrupts on Win32, which don't interrupt - # accept() by default - return - except socket.error, x: - if x.args[0] in socket_error_eintr: - # I *think* this is right. EINTR should occur when a signal - # is received during the accept() call; all docs say retry - # the call, and I *think* I'm reading it right that Python - # will then go ahead and poll for and handle the signal - # elsewhere. See http://www.cherrypy.org/ticket/707. - return - if x.args[0] in socket_errors_nonblocking: - # Just try again. See http://www.cherrypy.org/ticket/479. - return - if x.args[0] in socket_errors_to_ignore: - # Our socket was closed. - # See http://www.cherrypy.org/ticket/686. - return - raise - - def _get_interrupt(self): - return self._interrupt - def _set_interrupt(self, interrupt): - self._interrupt = True - self.stop() - self._interrupt = interrupt - interrupt = property(_get_interrupt, _set_interrupt, - doc="Set this to an Exception instance to " - "interrupt the server.") - - def stop(self): - """Gracefully shutdown a server that is serving forever.""" - self.ready = False - - sock = getattr(self, "socket", None) - if sock: - if not isinstance(self.bind_addr, basestring): - # Touch our own socket to make accept() return immediately. - try: - host, port = sock.getsockname()[:2] - except socket.error, x: - if x.args[0] not in socket_errors_to_ignore: - raise - else: - # Note that we're explicitly NOT using AI_PASSIVE, - # here, because we want an actual IP to touch. - # localhost won't work if we've bound to a public IP, - # but it will if we bound to '0.0.0.0' (INADDR_ANY). - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - s = None - try: - s = socket.socket(af, socktype, proto) - # See http://groups.google.com/group/cherrypy-users/ - # browse_frm/thread/bbfe5eb39c904fe0 - s.settimeout(1.0) - s.connect((host, port)) - s.close() - except socket.error: - if s: - s.close() - if hasattr(sock, "close"): - sock.close() - self.socket = None - - self.requests.stop(self.shutdown_timeout) - - def populate_ssl_environ(self): - """Create WSGI environ entries to be merged into each request.""" - cert = open(self.ssl_certificate, 'rb').read() - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) - ssl_environ = { - "wsgi.url_scheme": "https", - "HTTPS": "on", - # pyOpenSSL doesn't provide access to any of these AFAICT -## 'SSL_PROTOCOL': 'SSLv2', -## SSL_CIPHER string The cipher specification name -## SSL_VERSION_INTERFACE string The mod_ssl program version -## SSL_VERSION_LIBRARY string The OpenSSL program version - } - - # Server certificate attributes - ssl_environ.update({ - 'SSL_SERVER_M_VERSION': cert.get_version(), - 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), -## 'SSL_SERVER_V_START': Validity of server's certificate (start time), -## 'SSL_SERVER_V_END': Validity of server's certificate (end time), - }) - - for prefix, dn in [("I", cert.get_issuer()), - ("S", cert.get_subject())]: - # X509Name objects don't seem to have a way to get the - # complete DN string. Use str() and slice it instead, - # because str(dn) == "" - dnstr = str(dn)[18:-2] - - wsgikey = 'SSL_SERVER_%s_DN' % prefix - ssl_environ[wsgikey] = dnstr - - # The DN should be of the form: /k1=v1/k2=v2, but we must allow - # for any value to contain slashes itself (in a URL). - while dnstr: - pos = dnstr.rfind("=") - dnstr, value = dnstr[:pos], dnstr[pos + 1:] - pos = dnstr.rfind("/") - dnstr, key = dnstr[:pos], dnstr[pos + 1:] - if key and value: - wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) - ssl_environ[wsgikey] = value - - self.environ.update(ssl_environ) - + # Le sigh. Boo for backward-incompatible syntax. + exec('from .wsgiserver3 import *') diff --git a/src/cherrypy/wsgiserver/ssl_builtin.py b/src/cherrypy/wsgiserver/ssl_builtin.py new file mode 100644 index 0000000000..03bf05deed --- /dev/null +++ b/src/cherrypy/wsgiserver/ssl_builtin.py @@ -0,0 +1,91 @@ +"""A library for integrating Python's builtin ``ssl`` library with CherryPy. + +The ssl module must be importable for SSL functionality. + +To use this module, set ``CherryPyWSGIServer.ssl_adapter`` to an instance of +``BuiltinSSLAdapter``. +""" + +try: + import ssl +except ImportError: + ssl = None + +try: + from _pyio import DEFAULT_BUFFER_SIZE +except ImportError: + try: + from io import DEFAULT_BUFFER_SIZE + except ImportError: + DEFAULT_BUFFER_SIZE = -1 + +import sys + +from cherrypy import wsgiserver + + +class BuiltinSSLAdapter(wsgiserver.SSLAdapter): + """A wrapper for integrating Python's builtin ssl module with CherryPy.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if ssl is None: + raise ImportError("You must install the ssl module to use HTTPS.") + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def bind(self, sock): + """Wrap and return the given socket.""" + return sock + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + try: + s = ssl.wrap_socket(sock, do_handshake_on_connect=True, + server_side=True, certfile=self.certificate, + keyfile=self.private_key, ssl_version=ssl.PROTOCOL_SSLv23) + except ssl.SSLError: + e = sys.exc_info()[1] + if e.errno == ssl.SSL_ERROR_EOF: + # This is almost certainly due to the cherrypy engine + # 'pinging' the socket to assert it's connectable; + # the 'ping' isn't SSL. + return None, {} + elif e.errno == ssl.SSL_ERROR_SSL: + if e.args[1].endswith('http request'): + # The client is speaking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError + elif e.args[1].endswith('unknown protocol'): + # The client is speaking some non-HTTP protocol. + # Drop the conn. + return None, {} + raise + return s, self.get_environ(s) + + # TODO: fill this out more with mod ssl env + def get_environ(self, sock): + """Create WSGI environ entries to be merged into each request.""" + cipher = sock.cipher() + ssl_environ = { + "wsgi.url_scheme": "https", + "HTTPS": "on", + 'SSL_PROTOCOL': cipher[1], + 'SSL_CIPHER': cipher[0] +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + return ssl_environ + + if sys.version_info >= (3, 0): + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_makefile(sock, mode, bufsize) + else: + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_fileobject(sock, mode, bufsize) + diff --git a/src/cherrypy/wsgiserver/ssl_pyopenssl.py b/src/cherrypy/wsgiserver/ssl_pyopenssl.py new file mode 100644 index 0000000000..f3d9bf54b8 --- /dev/null +++ b/src/cherrypy/wsgiserver/ssl_pyopenssl.py @@ -0,0 +1,256 @@ +"""A library for integrating pyOpenSSL with CherryPy. + +The OpenSSL module must be importable for SSL functionality. +You can obtain it from http://pyopenssl.sourceforge.net/ + +To use this module, set CherryPyWSGIServer.ssl_adapter to an instance of +SSLAdapter. There are two ways to use SSL: + +Method One +---------- + + * ``ssl_adapter.context``: an instance of SSL.Context. + +If this is not None, it is assumed to be an SSL.Context instance, +and will be passed to SSL.Connection on bind(). The developer is +responsible for forming a valid Context object. This approach is +to be preferred for more flexibility, e.g. if the cert and key are +streams instead of files, or need decryption, or SSL.SSLv3_METHOD +is desired instead of the default SSL.SSLv23_METHOD, etc. Consult +the pyOpenSSL documentation for complete options. + +Method Two (shortcut) +--------------------- + + * ``ssl_adapter.certificate``: the filename of the server SSL certificate. + * ``ssl_adapter.private_key``: the filename of the server's private key file. + +Both are None by default. If ssl_adapter.context is None, but .private_key +and .certificate are both given and valid, they will be read, and the +context will be automatically created from them. +""" + +import socket +import threading +import time + +from cherrypy import wsgiserver + +try: + from OpenSSL import SSL + from OpenSSL import crypto +except ImportError: + SSL = None + + +class SSL_fileobject(wsgiserver.CP_fileobject): + """SSL file object attached to a socket object.""" + + ssl_timeout = 3 + ssl_retry = .01 + + def _safe_call(self, is_reader, call, *args, **kwargs): + """Wrap the given call with SSL error-trapping. + + is_reader: if False EOF errors will be raised. If True, EOF errors + will return "" (to emulate normal sockets). + """ + start = time.time() + while True: + try: + return call(*args, **kwargs) + except SSL.WantReadError: + # Sleep and try again. This is dangerous, because it means + # the rest of the stack has no way of differentiating + # between a "new handshake" error and "client dropped". + # Note this isn't an endless loop: there's a timeout below. + time.sleep(self.ssl_retry) + except SSL.WantWriteError: + time.sleep(self.ssl_retry) + except SSL.SysCallError, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + errnum = e.args[0] + if is_reader and errnum in wsgiserver.socket_errors_to_ignore: + return "" + raise socket.error(errnum) + except SSL.Error, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + thirdarg = None + try: + thirdarg = e.args[0][0][2] + except IndexError: + pass + + if thirdarg == 'http request': + # The client is talking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError() + + raise wsgiserver.FatalSSLAlert(*e.args) + except: + raise + + if time.time() - start > self.ssl_timeout: + raise socket.timeout("timed out") + + def recv(self, *args, **kwargs): + buf = [] + r = super(SSL_fileobject, self).recv + while True: + data = self._safe_call(True, r, *args, **kwargs) + buf.append(data) + p = self._sock.pending() + if not p: + return "".join(buf) + + def sendall(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).sendall, + *args, **kwargs) + + def send(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).send, + *args, **kwargs) + + +class SSLConnection: + """A thread-safe wrapper for an SSL.Connection. + + ``*args``: the arguments to create the wrapped ``SSL.Connection(*args)``. + """ + + def __init__(self, *args): + self._ssl_conn = SSL.Connection(*args) + self._lock = threading.RLock() + + for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', + 'renegotiate', 'bind', 'listen', 'connect', 'accept', + 'setblocking', 'fileno', 'close', 'get_cipher_list', + 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', + 'makefile', 'get_app_data', 'set_app_data', 'state_string', + 'sock_shutdown', 'get_peer_certificate', 'want_read', + 'want_write', 'set_connect_state', 'set_accept_state', + 'connect_ex', 'sendall', 'settimeout', 'gettimeout'): + exec("""def %s(self, *args): + self._lock.acquire() + try: + return self._ssl_conn.%s(*args) + finally: + self._lock.release() +""" % (f, f)) + + def shutdown(self, *args): + self._lock.acquire() + try: + # pyOpenSSL.socket.shutdown takes no args + return self._ssl_conn.shutdown() + finally: + self._lock.release() + + +class pyOpenSSLAdapter(wsgiserver.SSLAdapter): + """A wrapper for integrating pyOpenSSL with CherryPy.""" + + context = None + """An instance of SSL.Context.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + certificate_chain = None + """Optional. The filename of CA's intermediate certificate bundle. + + This is needed for cheaper "chained root" SSL certificates, and should be + left as None if not required.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if SSL is None: + raise ImportError("You must install pyOpenSSL to use HTTPS.") + + self.context = None + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + self._environ = None + + def bind(self, sock): + """Wrap and return the given socket.""" + if self.context is None: + self.context = self.get_context() + conn = SSLConnection(self.context, sock) + self._environ = self.get_environ() + return conn + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + return sock, self._environ.copy() + + def get_context(self): + """Return an SSL.Context from self attributes.""" + # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 + c = SSL.Context(SSL.SSLv23_METHOD) + c.use_privatekey_file(self.private_key) + if self.certificate_chain: + c.load_verify_locations(self.certificate_chain) + c.use_certificate_file(self.certificate) + return c + + def get_environ(self): + """Return WSGI environ entries to be merged into each request.""" + ssl_environ = { + "HTTPS": "on", + # pyOpenSSL doesn't provide access to any of these AFAICT +## 'SSL_PROTOCOL': 'SSLv2', +## SSL_CIPHER string The cipher specification name +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + + if self.certificate: + # Server certificate attributes + cert = open(self.certificate, 'rb').read() + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) + ssl_environ.update({ + 'SSL_SERVER_M_VERSION': cert.get_version(), + 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), +## 'SSL_SERVER_V_START': Validity of server's certificate (start time), +## 'SSL_SERVER_V_END': Validity of server's certificate (end time), + }) + + for prefix, dn in [("I", cert.get_issuer()), + ("S", cert.get_subject())]: + # X509Name objects don't seem to have a way to get the + # complete DN string. Use str() and slice it instead, + # because str(dn) == "" + dnstr = str(dn)[18:-2] + + wsgikey = 'SSL_SERVER_%s_DN' % prefix + ssl_environ[wsgikey] = dnstr + + # The DN should be of the form: /k1=v1/k2=v2, but we must allow + # for any value to contain slashes itself (in a URL). + while dnstr: + pos = dnstr.rfind("=") + dnstr, value = dnstr[:pos], dnstr[pos + 1:] + pos = dnstr.rfind("/") + dnstr, key = dnstr[:pos], dnstr[pos + 1:] + if key and value: + wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) + ssl_environ[wsgikey] = value + + return ssl_environ + + def makefile(self, sock, mode='r', bufsize=-1): + if SSL and isinstance(sock, SSL.ConnectionType): + timeout = sock.gettimeout() + f = SSL_fileobject(sock, mode, bufsize) + f.ssl_timeout = timeout + return f + else: + return wsgiserver.CP_fileobject(sock, mode, bufsize) + diff --git a/src/cherrypy/wsgiserver/wsgiserver2.py b/src/cherrypy/wsgiserver/wsgiserver2.py new file mode 100644 index 0000000000..b6bd499718 --- /dev/null +++ b/src/cherrypy/wsgiserver/wsgiserver2.py @@ -0,0 +1,2322 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_fileobject', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import rfc822 +import socket +import sys +if 'win' in sys.platform and not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 +try: + import cStringIO as StringIO +except ImportError: + import StringIO +DEFAULT_BUFFER_SIZE = -1 + +_fileobject_uses_str_type = isinstance(socket._fileobject(None)._rbuf, basestring) + +import threading +import time +import traceback +def format_exc(limit=None): + """Like print_exc() but return a string. Backport for Python 2.3.""" + try: + etype, value, tb = sys.exc_info() + return ''.join(traceback.format_exception(etype, value, tb, limit)) + finally: + etype = value = tb = None + + +from urllib import unquote +from urlparse import urlparse +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate']] + + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = ", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server= server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response("414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response("400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + rp = int(req_protocol[5]), int(req_protocol[7]) + except (ValueError, IndexError): + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [unquote(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = "%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + sp = int(self.server.protocol[5]), int(self.server.protocol[7]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get("Content-Length", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get("Connection", "") == "close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get("Connection", "") != "Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get("Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == "chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get("Expect", "") == "100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See http://www.cherrypy.org/ticket/951 + msg = self.server.protocol + " 100 Continue\r\n\r\n" + try: + self.conn.wfile.sendall(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + i = uri.find('://') + if i > 0 and QUESTION_MARK not in uri[:i]: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + scheme, remainder = uri[:i].lower(), uri[i + 3:] + authority, path = remainder.split(FORWARD_SLASH, 1) + path = FORWARD_SLASH + path + return scheme, authority, path + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get("Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.sendall("0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [self.server.protocol + SPACE + + status + CRLF, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append("Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [hex(len(chunk))[2:], CRLF, chunk, CRLF] + self.conn.wfile.sendall(EMPTY.join(buf)) + else: + self.conn.wfile.sendall(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif "content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != 'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append(("Transfer-Encoding", "chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if "connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append(("Connection", "close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append(("Connection", "Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if "date" not in hkeys: + self.outheaders.append(("Date", rfc822.formatdate())) + + if "server" not in hkeys: + self.outheaders.append(("Server", self.server.server_name)) + + buf = [self.server.protocol + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.sendall(EMPTY.join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_fileobject(socket._fileobject): + """Faux file object attached to a socket object.""" + + def __init__(self, *args, **kwargs): + self.bytes_read = 0 + self.bytes_written = 0 + socket._fileobject.__init__(self, *args, **kwargs) + + def sendall(self, data): + """Sendall for non-blocking sockets.""" + while data: + try: + bytes_sent = self.send(data) + data = data[bytes_sent:] + except socket.error, e: + if e.args[0] not in socket_errors_nonblocking: + raise + + def send(self, data): + bytes_sent = self._sock.send(data) + self.bytes_written += bytes_sent + return bytes_sent + + def flush(self): + if self._wbuf: + buffer = "".join(self._wbuf) + self._wbuf = [] + self.sendall(buffer) + + def recv(self, size): + while True: + try: + data = self._sock.recv(size) + self.bytes_read += len(data) + return data + except socket.error, e: + if (e.args[0] not in socket_errors_nonblocking + and e.args[0] not in socket_error_eintr): + raise + + if not _fileobject_uses_str_type: + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(rbufsize) + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + data = self.recv(left) + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self.recv + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(self._rbufsize) + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(self._rbufsize) + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + else: + def read(self, size=-1): + if size < 0: + # Read until EOF + buffers = [self._rbuf] + self._rbuf = "" + if self._rbufsize <= 1: + recv_size = self.default_bufsize + else: + recv_size = self._rbufsize + + while True: + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + return "".join(buffers) + else: + # Read until size bytes or EOF seen, whichever comes first + data = self._rbuf + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + left = size - buf_len + recv_size = max(self._rbufsize, left) + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + def readline(self, size=-1): + data = self._rbuf + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + assert data == "" + buffers = [] + while data != "\n": + data = self.recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + return "".join(buffers) + else: + # Read until size bytes or \n or EOF seen, whichever comes first + nl = data.find('\n', 0, size) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + left = size - buf_len + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + +class HTTPConnection(object): + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_fileobject): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if errnum == 'timed out' or errnum == 'The read operation timed out': + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See http://www.cherrypy.org/ticket/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_fileobject(self.socket._sock, "wb", self.wbufsize) + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + if hasattr(self.socket, '_sock'): + self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + """An object which equals and does math like the integer '0' but evals True.""" + def __add__(self, other): + return other + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ((self.start_time is None) and trueyzero or self.conn.requests_seen), + 'Bytes Read': lambda s: self.bytes_read + ((self.start_time is None) and trueyzero or self.conn.rfile.bytes_read), + 'Bytes Written': lambda s: self.bytes_written + ((self.start_time is None) and trueyzero or self.conn.wfile.bytes_written), + 'Work Time': lambda s: self.work_time + ((self.start_time is None) and trueyzero or time.time() - self.start_time), + 'Read Throughput': lambda s: s['Bytes Read'](s) / (s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / (s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in range(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in range(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit).""" + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections (default 5).""" + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit.""" + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.2.2" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections.""" + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum([w['Requests'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Read'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Written'](w) for w + in s['Worker Threads'].values()], 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum([w['Work Time'](w) for w + in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # SSL backward compatibility + if (self.ssl_adapter is None and + getattr(self, 'ssl_certificate', None) and + getattr(self, 'ssl_private_key', None)): + warnings.warn( + "SSL attributes are deprecated in CherryPy 3.2, and will " + "be removed in CherryPy 3.3. Use an ssl_adapter attribute " + "instead.", + DeprecationWarning + ) + try: + from cherrypy.wsgiserver.ssl_pyopenssl import pyOpenSSLAdapter + except ImportError: + pass + else: + self.ssl_adapter = pyOpenSSLAdapter( + self.ssl_certificate, self.ssl_private_key, + getattr(self, 'ssl_certificate_chain', None)) + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 511) # 0777 + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See http://www.cherrypy.org/ticket/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_fileobject + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See http://www.cherrypy.org/ticket/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + """A base class to interface HTTPServer with other systems, such as WSGI.""" + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + 'pyopenssl': 'cherrypy.wsgiserver.ssl_pyopenssl.pyOpenSSLAdapter', + } + +def get_ssl_adapter_class(name='pyopenssl'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# -------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0], exc_info[1], exc_info[2] + finally: + exc_info = None + + self.req.status = status + for k, v in headers: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.extend(headers) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path, + 'QUERY_STRING': req.qs, + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method, + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol, + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme, + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.iteritems(): + env["HTTP_" + k.upper().replace("-", "_")] = v + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and values + in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = dict([(k.decode('ISO-8859-1'), v) for k, v in env_10.iteritems()]) + env[u'wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault(u'wsgi.url_encoding', u'utf-8') + try: + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env[u'wsgi.url_encoding'] = u'ISO-8859-1' + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + + for k, v in sorted(env.items()): + if isinstance(v, str) and k not in ('REQUEST_URI', 'wsgi.input'): + env[k] = v.decode('ISO-8859-1') + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort(cmp=lambda x,y: cmp(len(x[0]), len(y[0]))) + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] + diff --git a/src/cherrypy/wsgiserver/wsgiserver3.py b/src/cherrypy/wsgiserver/wsgiserver3.py new file mode 100644 index 0000000000..62db5ffd3b --- /dev/null +++ b/src/cherrypy/wsgiserver/wsgiserver3.py @@ -0,0 +1,2040 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_makefile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import email.utils +import socket +import sys +if 'win' in sys.platform and not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 +if sys.version_info < (3,1): + import io +else: + import _pyio as io +DEFAULT_BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE + +import threading +import time +from traceback import format_exc +from urllib.parse import unquote +from urllib.parse import urlparse +from urllib.parse import scheme_chars +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate']] + + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = b", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server= server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response("414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response("400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + # The [x:y] slicing is necessary for byte strings to avoid getting ord's + rp = int(req_protocol[5:6]), int(req_protocol[7:8]) + except ValueError: + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [self.unquote_bytes(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = b"%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + # The [x:y] slicing is necessary for byte strings to avoid getting ord's + sp = int(self.server.protocol[5:6]), int(self.server.protocol[7:8]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get(b"Content-Length", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get(b"Connection", b"") == b"close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get(b"Connection", b"") != b"Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get(b"Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(b",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == b"chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get(b"Expect", b"") == b"100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See http://www.cherrypy.org/ticket/951 + msg = self.server.protocol.encode('ascii') + b" 100 Continue\r\n\r\n" + try: + self.conn.wfile.write(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + scheme, sep, remainder = uri.partition(b'://') + if sep and QUESTION_MARK not in scheme: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + authority, path_a, path_b = remainder.partition(FORWARD_SLASH) + return scheme.lower(), authority, path_a+path_b + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def unquote_bytes(self, path): + """takes quoted string and unquotes % encoded values""" + res = path.split(b'%') + + for i in range(1, len(res)): + item = res[i] + try: + res[i] = bytes([int(item[:2], 16)]) + item[2:] + except ValueError: + raise + return b''.join(res) + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get(b"Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.write(b"0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [bytes(self.server.protocol, "ascii") + SPACE + + bytes(status, "ISO-8859-1") + CRLF, + bytes("Content-Length: %s\r\n" % len(msg), "ISO-8859-1"), + b"Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append(b"Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.write(b"".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [bytes(hex(len(chunk)), 'ASCII')[2:], CRLF, chunk, CRLF] + self.conn.wfile.write(EMPTY.join(buf)) + else: + self.conn.wfile.write(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif b"content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != b'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append((b"Transfer-Encoding", b"chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if b"connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append((b"Connection", b"close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append((b"Connection", b"Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if b"date" not in hkeys: + self.outheaders.append( + (b"Date", email.utils.formatdate(usegmt=True).encode('ISO-8859-1'))) + + if b"server" not in hkeys: + self.outheaders.append( + (b"Server", self.server.server_name.encode('ISO-8859-1'))) + + buf = [self.server.protocol.encode('ascii') + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.write(EMPTY.join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_BufferedWriter(io.BufferedWriter): + """Faux file object attached to a socket object.""" + + def write(self, b): + self._checkClosed() + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + + with self._write_lock: + self._write_buf.extend(b) + self._flush_unlocked() + return len(b) + + def _flush_unlocked(self): + self._checkClosed("flush of closed file") + while self._write_buf: + try: + # ssl sockets only except 'bytes', not bytearrays + # so perhaps we should conditionally wrap this for perf? + n = self.raw.write(bytes(self._write_buf)) + except io.BlockingIOError as e: + n = e.characters_written + del self._write_buf[:n] + + +def CP_makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + if 'r' in mode: + return io.BufferedReader(socket.SocketIO(sock, mode), bufsize) + else: + return CP_BufferedWriter(socket.SocketIO(sock, mode), bufsize) + +class HTTPConnection(object): + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_makefile): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if errnum == 'timed out' or errnum == 'The read operation timed out': + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See http://www.cherrypy.org/ticket/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_makefile(self.socket._sock, "wb", self.wbufsize) + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + # Python 3 *probably* fixed this with socket._real_close; hard to tell. +## self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + """An object which equals and does math like the integer '0' but evals True.""" + def __add__(self, other): + return other + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ((self.start_time is None) and trueyzero or self.conn.requests_seen), + 'Bytes Read': lambda s: self.bytes_read + ((self.start_time is None) and trueyzero or self.conn.rfile.bytes_read), + 'Bytes Written': lambda s: self.bytes_written + ((self.start_time is None) and trueyzero or self.conn.wfile.bytes_written), + 'Work Time': lambda s: self.work_time + ((self.start_time is None) and trueyzero or time.time() - self.start_time), + 'Read Throughput': lambda s: s['Bytes Read'](s) / (s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / (s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in range(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in range(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit).""" + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections (default 5).""" + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit.""" + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.2.2" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections.""" + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum([w['Requests'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Read'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Written'](w) for w + in s['Worker Threads'].values()], 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum([w['Work Time'](w) for w + in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 511) # 0777 + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See http://www.cherrypy.org/ticket/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_makefile + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.write("".join(buf).encode('ISO-8859-1')) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See http://www.cherrypy.org/ticket/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + """A base class to interface HTTPServer with other systems, such as WSGI.""" + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + } + +def get_ssl_adapter_class(name='builtin'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# -------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) + finally: + exc_info = None + + # According to PEP 3333, when using Python 3, the response status + # and headers must be bytes masquerading as unicode; that is, they + # must be of type "str" but are restricted to code points in the + # "latin-1" set. + if not isinstance(status, str): + raise TypeError("WSGI response status is not of type str.") + self.req.status = status.encode('ISO-8859-1') + + for k, v in headers: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.append((k.encode('ISO-8859-1'), v.encode('ISO-8859-1'))) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path.decode('ISO-8859-1'), + 'QUERY_STRING': req.qs.decode('ISO-8859-1'), + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method.decode('ISO-8859-1'), + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol.decode('ISO-8859-1'), + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme.decode('ISO-8859-1'), + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.items(): + k = k.decode('ISO-8859-1').upper().replace("-", "_") + env["HTTP_" + k] = v.decode('ISO-8859-1') + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and values + in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = env_10.copy() + env['wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault('wsgi.url_encoding', 'utf-8') + try: + # SCRIPT_NAME is the empty string, who cares what encoding it is? + env["PATH_INFO"] = req.path.decode(env['wsgi.url_encoding']) + env["QUERY_STRING"] = req.qs.decode(env['wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env['wsgi.url_encoding'] = 'ISO-8859-1' + env["PATH_INFO"] = env_10["PATH_INFO"] + env["QUERY_STRING"] = env_10["QUERY_STRING"] + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort() + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] + diff --git a/src/odf/namespaces.py b/src/odf/namespaces.py index 96ea958e79..cff8a74992 100644 --- a/src/odf/namespaces.py +++ b/src/odf/namespaces.py @@ -20,10 +20,12 @@ TOOLSVERSION = u"ODFPY/0.9.4dev" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" -DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" CHARTNS = u"urn:oasis:names:tc:opendocument:xmlns:chart:1.0" +CHARTOOONS = u"http://openoffice.org/2010/chart" CONFIGNS = u"urn:oasis:names:tc:opendocument:xmlns:config:1.0" +CSS3TNS = u"http://www.w3.org/TR/css3-text/" #DBNS = u"http://openoffice.org/2004/database" +DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" DCNS = u"http://purl.org/dc/elements/1.1/" DOMNS = u"http://www.w3.org/2001/xml-events" DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" @@ -31,6 +33,7 @@ DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0" +FORMXNS = u"urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" GRDDLNS = u"http://www.w3.org/2003/g/data-view#" KOFFICENS = u"http://www.koffice.org/2005/" MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" @@ -39,9 +42,9 @@ METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0" NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0" OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2" +OOOCNS = u"http://openoffice.org/2004/calc" OOONS = u"http://openoffice.org/2004/office" OOOWNS = u"http://openoffice.org/2004/writer" -OOOCNS = u"http://openoffice.org/2004/calc" PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#" RPTNS = u"http://openoffice.org/2005/report" @@ -50,8 +53,10 @@ SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0" STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0" SVGNS = u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" TABLENS = u"urn:oasis:names:tc:opendocument:xmlns:table:1.0" +TABLEOOONS = u"http://openoffice.org/2009/table" TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0" XFORMSNS = u"http://www.w3.org/2002/xforms" +XHTMLNS = u"http://www.w3.org/1999/xhtml" XLINKNS = u"http://www.w3.org/1999/xlink" XMLNS = u"http://www.w3.org/XML/1998/namespace" XSDNS = u"http://www.w3.org/2001/XMLSchema" @@ -60,7 +65,9 @@ XSINS = u"http://www.w3.org/2001/XMLSchema-instance" nsdict = { ANIMNS: u'anim', CHARTNS: u'chart', + CHARTOOONS: u'chartooo', CONFIGNS: u'config', + CSS3TNS: u'css3t', DBNS: u'db', DCNS: u'dc', DOMNS: u'dom', @@ -69,6 +76,7 @@ nsdict = { FIELDNS: u'field', FONS: u'fo', FORMNS: u'form', + FORMXNS: u'formx', GRDDLNS: u'grddl', KOFFICENS: u'koffice', MANIFESTNS: u'manifest', @@ -88,9 +96,11 @@ nsdict = { STYLENS: u'style', SVGNS: u'svg', TABLENS: u'table', + TABLEOOONS: u'tableooo', TEXTNS: u'text', XFORMSNS: u'xforms', XLINKNS: u'xlink', + XHTMLNS: u'xhtml', XMLNS: u'xml', XSDNS: u'xsd', XSINS: u'xsi', diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index b1dbebb775..df7ce0bc8d 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -1158,7 +1158,7 @@ ol, ul { padding-left: 2em; } """ Anchors start """ self.writedata() href = attrs[(XLINKNS,"href")].split("|")[0] - if href[0] == "#": + if href[:1] == "#": # Changed by Kovid href = "#" + self.get_anchor(href[1:]) self.opentag('a', {'href':href}) self.purgedata() diff --git a/src/odf/opendocument.py b/src/odf/opendocument.py index 63196382d5..69ada218aa 100644 --- a/src/odf/opendocument.py +++ b/src/odf/opendocument.py @@ -232,7 +232,9 @@ class OpenDocument: """ for e in top.childNodes: if e.nodeType == element.Node.ELEMENT_NODE: - for styleref in ( (DRAWNS,u'style-name'), + for styleref in ( + (CHARTNS,u'style-name'), + (DRAWNS,u'style-name'), (DRAWNS,u'text-style-name'), (PRESENTATIONNS,u'style-name'), (STYLENS,u'data-style-name'),