From 331cc5da66d9075d55273d3b000b97e2bec296ac Mon Sep 17 00:00:00 2001 From: Translators <> Date: Thu, 22 Dec 2011 05:26:48 +0000 Subject: [PATCH 01/74] Launchpad automatic translations update. --- src/calibre/translations/de.po | 33 +++++---- src/calibre/translations/fr.po | 49 +++++++++++- src/calibre/translations/it.po | 29 ++++++-- src/calibre/translations/uk.po | 132 ++++++++++++++++++++++++++++++++- 4 files changed, 216 insertions(+), 27 deletions(-) diff --git a/src/calibre/translations/de.po b/src/calibre/translations/de.po index 187a9b996d..96a2cec215 100644 --- a/src/calibre/translations/de.po +++ b/src/calibre/translations/de.po @@ -8,15 +8,15 @@ msgstr "" "Project-Id-Version: de\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-20 19:33+0000\n" -"Last-Translator: Marian Felkel \n" +"PO-Revision-Date: 2011-12-21 16:08+0000\n" +"Last-Translator: Mirko Russo \n" "Language-Team: American English \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" -"X-Launchpad-Export-Date: 2011-12-21 05:19+0000\n" -"X-Generator: Launchpad (build 14538)\n" +"X-Launchpad-Export-Date: 2011-12-22 05:26+0000\n" +"X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: 3327,-1,-1,-1,-1,-1,-1,-1,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -20368,11 +20368,11 @@ msgstr "Englisch (Australien)" #: /home/kovid/work/calibre/src/calibre/utils/localization.py:115 msgid "English (Japan)" -msgstr "" +msgstr "Englisch (Japan)" #: /home/kovid/work/calibre/src/calibre/utils/localization.py:116 msgid "English (Germany)" -msgstr "" +msgstr "Englisch (Deutschland)" #: /home/kovid/work/calibre/src/calibre/utils/localization.py:117 msgid "English (Bulgaria)" @@ -20521,22 +20521,22 @@ msgstr "Holländisch (BE)" #. NOTE: Ante Meridian (i.e. like 10:00 AM) #: /home/kovid/work/calibre/src/calibre/utils/localization.py:160 msgid "AM" -msgstr "" +msgstr "vormittags" #. NOTE: Post Meridian (i.e. like 10:00 PM) #: /home/kovid/work/calibre/src/calibre/utils/localization.py:162 msgid "PM" -msgstr "" +msgstr "nachmittags" #. NOTE: Ante Meridian (i.e. like 10:00 am) #: /home/kovid/work/calibre/src/calibre/utils/localization.py:164 msgid "am" -msgstr "" +msgstr "vormittags" #. NOTE: Post Meridian (i.e. like 10:00 pm) #: /home/kovid/work/calibre/src/calibre/utils/localization.py:166 msgid "pm" -msgstr "" +msgstr "nachmittags" #: /home/kovid/work/calibre/src/calibre/utils/pyconsole/console.py:56 msgid "Choose theme (needs restart)" @@ -21053,7 +21053,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:76 msgid "Splitting multiple author names" -msgstr "" +msgstr "Mehrfachnamen bei Autoren aufsplitten" #: /home/kovid/work/calibre/resources/default_tweaks.py:77 msgid "" @@ -21067,7 +21067,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:84 msgid "Use author sort in Tag Browser" -msgstr "" +msgstr "Benutze die Autorensortierung im Schlagwort-Browser" #: /home/kovid/work/calibre/resources/default_tweaks.py:85 msgid "" @@ -21108,7 +21108,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:108 msgid "Control partitioning of Tag Browser" -msgstr "" +msgstr "Kontrolliere die Partition im Schlagwort-Browser" #: /home/kovid/work/calibre/resources/default_tweaks.py:109 msgid "" @@ -21219,6 +21219,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:188 msgid "Control formatting of title and series when used in templates" msgstr "" +"Kontrolliere das Formatieren von Titel und Serien bei Benutzung in Vorlagen" #: /home/kovid/work/calibre/resources/default_tweaks.py:189 msgid "" @@ -21386,6 +21387,8 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:334 msgid "Control how tags are applied when copying books to another library" msgstr "" +"Kontrolliere, wie Schlagworte angewandt werden, wenn Bücher in eine andere " +"Bibliothek kopiert werden" #: /home/kovid/work/calibre/resources/default_tweaks.py:335 msgid "" @@ -21526,7 +21529,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:411 msgid "Order of custom column(s) in edit metadata" -msgstr "" +msgstr "Reihenfolge der benutzerdefinierten Spalten bei Metadaten bearbeiten" #: /home/kovid/work/calibre/resources/default_tweaks.py:412 msgid "" @@ -21639,7 +21642,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:458 msgid "Unified toolbar on OS X" -msgstr "" +msgstr "Vereinheitlichte Symbolleiste in OS X" #: /home/kovid/work/calibre/resources/default_tweaks.py:459 msgid "" diff --git a/src/calibre/translations/fr.po b/src/calibre/translations/fr.po index 6498193668..cec518fc25 100644 --- a/src/calibre/translations/fr.po +++ b/src/calibre/translations/fr.po @@ -8,15 +8,15 @@ msgstr "" "Project-Id-Version: calibre 0.4.22\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-19 12:44+0000\n" +"PO-Revision-Date: 2011-12-21 19:46+0000\n" "Last-Translator: sengian \n" "Language-Team: Français \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n > 1;\n" -"X-Launchpad-Export-Date: 2011-12-20 04:49+0000\n" -"X-Generator: Launchpad (build 14538)\n" +"X-Launchpad-Export-Date: 2011-12-22 05:25+0000\n" +"X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: 1177,1104,-1,-1,-1,-1,-1,-1,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -21550,6 +21550,29 @@ msgid "" "Acme\n" "Inc. will be Acme Inc. instead of Inc., Acme" msgstr "" +"L'algorithme utilisé pour copier l'auteur dans la clé de tri par auteur\n" +"Les valeurs possibles sont :\n" +"invert : utilise \"fn ln\" -> \"ln, fn\"\n" +"copy : copie l'auteur dans la clé de tri par auteur sans modification\n" +"comma : utilise 'copy' si il y a un ',' dans le nom, autrement utilise " +"'invert'\n" +"nocomma : \"fn ln\" -> \"ln fn\" (sans la virgule)\n" +"Quand cette personnalisation est modifié, les clés de tri par auteur stockés " +"pour\n" +"chaque auteur doivent être recalculés en effectuant un click droit sur " +"l'auteur\n" +"dans le panneau étiquette à gauche, en sélectionnant 'Gérer les auteurs', et " +"en\n" +"pressant 'Recalculer toutes les valeurs des clés par auteur'.\n" +"Les suffixes au nom de l'auteur sont des mots qui sont ignorés à la fin\n" +"du nom d'un auteur. La casse du suffixe est ignoré et les points de " +"suspension\n" +"sont gérés automatiquement. Le même traitement s'applique aux préfixes.\n" +"Les mots copiés du nom de l'auteur sont un ensemble de mots qui si ils sont\n" +"présents dans un nom d'auteur entrainent automatiquement une chaine de \n" +"clé de tri par auteur identique au nom de l'auteur. Ceci signifie que la " +"clé\n" +"de tri pour une chaine comme Acme Inc. sera Acme Inc. et non Inc., Acme" #: /home/kovid/work/calibre/resources/default_tweaks.py:76 msgid "Splitting multiple author names" @@ -21674,6 +21697,14 @@ msgid "" "For example, set it to [('authors',0),('title',0)] to sort by\n" "title within authors." msgstr "" +"Fourni un ensemble de colonnes à trier lorsque Calibre démarre\n" +"L'argument est None si l'historique de recherche sauvegardé\n" +"doit être utilisé, autrement il s'agit d'une liste de doublets\n" +"colonne, ordre. Colonne est le nom de recherche et peut être\n" +"trouvé en utilisant l'infobulle de la colonne.\n" +"L'Ordre est 0 pour ascendant, 1 pour descendant\n" +"Par exemple, mettez le à [('authors',0),('title',0)] pour trier par\n" +"titre à l'intérieur du tri par auteur." #: /home/kovid/work/calibre/resources/default_tweaks.py:140 msgid "Control how dates are displayed" @@ -22160,6 +22191,18 @@ msgid "" "Enter a comma-separated list of custom field lookup names, as in\n" "metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']" msgstr "" +"Contrôle l'ordre dans lequel les colonnes personnalisées sont listées\n" +"dans l'édition des métadonnées simple et en batch. Les colonnes listées\n" +"dans cette personnalisation sont affichées d'abord et dans l'ordre fourni.\n" +"Toute colonne non listée est affichée après celles listées, dans l'ordre\n" +"alphabétique. Notez que cette personnalisation ne change pas la taille\n" +"des fenêtres d'édition. Placer des fenêtres de commentaires dans cette " +"liste\n" +"peut donner des espacements de fenêtres étranges lorsque le mode 2 colonnes\n" +"est utilisé.\n" +"Entrez une liste séparée par des virgules de noms de recherche de champs\n" +"personnalisés, comme dans\n" +"metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']" #: /home/kovid/work/calibre/resources/default_tweaks.py:422 msgid "The number of seconds to wait before sending emails" diff --git a/src/calibre/translations/it.po b/src/calibre/translations/it.po index 627e597950..7609e38da2 100644 --- a/src/calibre/translations/it.po +++ b/src/calibre/translations/it.po @@ -10,15 +10,15 @@ msgstr "" "Project-Id-Version: calibre_calibre-it\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-16 17:28+0000\n" +"PO-Revision-Date: 2011-12-21 18:34+0000\n" "Last-Translator: Vincenzo Reale \n" "Language-Team: Italian \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" -"X-Launchpad-Export-Date: 2011-12-17 05:01+0000\n" -"X-Generator: Launchpad (build 14525)\n" +"X-Launchpad-Export-Date: 2011-12-22 05:26+0000\n" +"X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: -1,-1,-1,-1,-1,1105,-1,1312,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -14856,6 +14856,8 @@ msgid "" "More specific format and device plugboards already exist. Are you sure you " "want to add the new plugboard?" msgstr "" +"Altri controlli specifici per formato e dispositivo esistono già. Sei sicuro " +"di voler aggiungere un nuovo controllo?" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugboard.py:248 msgid "The {0} device does not support the {1} format." @@ -21167,6 +21169,23 @@ msgid "" "content_server_will_display = ['*']\n" "content_server_wont_display['#mycomments']" msgstr "" +"content_server_will_display è un elenco di campi personalizzati da " +"visualizzare.\n" +"content_server_wont_display è un elenco di campi personalizzati da non " +"visualizzare.\n" +"wont_display ha priorità su will_display.\n" +"Il valore speciale '*' significa tutti i campi personalizzati. Il valore [] " +"significa nessuna voce.\n" +"Predefiniti:\n" +"content_server_will_display = ['*']\n" +"content_server_wont_display = []\n" +"Esempi:\n" +"Per visualizzare solo i campi personalizzati #mytags e #genre:\n" +"content_server_will_display = ['#mytags', '#genre']\n" +"content_server_wont_display = []\n" +"Per visualizzare tutti i campi eccetto #mycomments:\n" +"content_server_will_display = ['*']\n" +"content_server_wont_display['#mycomments']" #: /home/kovid/work/calibre/resources/default_tweaks.py:360 msgid "Set the maximum number of sort 'levels'" @@ -21186,7 +21205,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:368 msgid "Choose whether dates are sorted using visible fields" -msgstr "" +msgstr "Scegli se ordinare le date utilizzando campi visibili" #: /home/kovid/work/calibre/resources/default_tweaks.py:369 msgid "" @@ -21259,7 +21278,7 @@ msgstr "" #: /home/kovid/work/calibre/resources/default_tweaks.py:411 msgid "Order of custom column(s) in edit metadata" -msgstr "" +msgstr "Ordine delle colonne personalizzate nella modifica dei metadati" #: /home/kovid/work/calibre/resources/default_tweaks.py:412 msgid "" diff --git a/src/calibre/translations/uk.po b/src/calibre/translations/uk.po index 2f678e3885..c3ca543c04 100644 --- a/src/calibre/translations/uk.po +++ b/src/calibre/translations/uk.po @@ -9,7 +9,7 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-20 21:03+0000\n" +"PO-Revision-Date: 2011-12-21 20:01+0000\n" "Last-Translator: yurchor \n" "Language-Team: Ukrainian \n" "MIME-Version: 1.0\n" @@ -17,8 +17,8 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && " "n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" -"X-Launchpad-Export-Date: 2011-12-21 05:19+0000\n" -"X-Generator: Launchpad (build 14538)\n" +"X-Launchpad-Export-Date: 2011-12-22 05:26+0000\n" +"X-Generator: Launchpad (build 14560)\n" "Language: uk\n" #: /home/kovid/work/calibre/src/calibre/customize/__init__.py:56 @@ -4015,6 +4015,11 @@ msgid "" "time required. Check the download all metadata option below to enable " "downloading this data." msgstr "" +"Додаткові метадані може бути отримано зі сторінки даних книги на Overdrive. " +"Серед цих даних обмежений набір міток, що використовуються бібліотеками, " +"коментарі, назва мови та ISBN електронної книги. Типово, збирання цих даних " +"вимкнено через значний час, що витрачається на цю процедуру. Позначте пункт " +"отримання всіх даних, розташований нижче, щоб увімкнути отримання цих даних." #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/sources/ozon.py:24 msgid "Downloads metadata and covers from OZON.ru" @@ -4636,6 +4641,19 @@ msgid "" "* off: Don't modify the paragraph structure. This is useful when combined " "with Markdown or Textile formatting to ensure no formatting is lost." msgstr "" +"Структура абзацу.\n" +"Варіанти: 'auto', 'block', 'single', 'print', 'unformatted', 'off'\n" +"* auto: спробувати визначити тип абзацу автоматично.\n" +"* block: вважати порожній рядок межею абзацу.\n" +"* single: вважати кожен рядок окремим абзацом.\n" +"* print: вважати кожен з рядків, які починаються з 2 або більше пробілів або " +"табуляції, абзацами.\n" +"* unformatted: більшість рядків розірвано примусово, мало порожніх рядків та " +"відступів або повна відсутність цих рядків або відступів. Спробувати " +"визначити структуру та переформатувати елементи з різним форматуванням.\n" +"* off: не змінювати структури абзаців. Корисно, якщо поєднується з " +"форматуванням Markdown або Textile з метою забезпечення збереження " +"форматування." #: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:43 msgid "" @@ -4649,6 +4667,15 @@ msgid "" "* markdown: Processing using markdown formatting. To learn more about " "markdown see" msgstr "" +"Форматування у межах документа.\n" +"* auto: автоматично визначати, який обробник форматування використовувати.\n" +"* plain: не обробляти форматування документа. Вважати абзацом без " +"застосування стилів.\n" +"* heuristic: обробити з використанням евристики, щоб визначити форматування, " +"зокрема заголовки глав та курсивний текст.\n" +"* textile: обробка за допомогою форматування textile.\n" +"* markdown: обробка за допомогою форматування markdown. Щоб дізнатися більше " +"про markdown, див." #: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:53 msgid "" @@ -4734,6 +4761,11 @@ msgid "" "set and default to the color displayed by the reader (generally this is " "black)." msgstr "" +"Не вилучати даних щодо кольору шрифтів з виведених даних. Корисно, лише якщо " +"форматування виведення тексту встановлено у значення textile. Textile — " +"єдине форматування, у якому передбачено підтримку кольорів шрифтів. Якщо цей " +"параметр не вказано, шрифти кольорів не буде встановлено, типово буде " +"використано колір показу пристрою для читання (типовим кольором є чорний)." #: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:115 msgid "Send file to storage card instead of main memory by default" @@ -7285,6 +7317,11 @@ msgid "" "understanding the conversion process and figuring out the correct values for " "conversion parameters like Table of Contents and Chapter Detection." msgstr "" +"Виберіть теку для зберігання виведених діагностичних даних. Якщо вами буде " +"вказано теку, calibre розташує у ній значну частину виведених діагностичних " +"даних. Ці дані будуть корисними для розуміння процедури перетворення і " +"визначення відповідних значень для параметрів перетворення, зокрема " +"параметрів змісту та виявлення глав." #: /home/kovid/work/calibre/src/calibre/gui2/convert/debug_ui.py:59 msgid "" @@ -8145,6 +8182,12 @@ msgid "" "wizard buttons below will allow you to test your regular expression against " "the current input document." msgstr "" +"

Засіб пошуку і заміни використовує формальні вирази. Ознайомтеся з " +"підручником з " +"формальних виразів, щоб отримати початкові відомості щодо формальних " +"виразів. Крім того, натискання наведених нижче кнопок майстра надасть вам " +"змогу перевірити дію ваших формальних виразів на поточному документі з " +"вхідними даними." #: /home/kovid/work/calibre/src/calibre/gui2/convert/single.py:173 msgid "Convert" @@ -8245,6 +8288,11 @@ msgid "" "to use these options. Leave the replace field blank and enter your " "header/footer removal regexps into the search field." msgstr "" +"Параметри вилучення верхнього і нижнього колонтитулів було замінено на " +"параметри пошуку з заміною. Щоб скористатися цими параметрами, натисніть " +"пункт категорії «Пошук з заміною» на панелі ліворуч. Не заповнюйте поле " +"замінника і вкажіть формальні вирази верхнього і нижнього колонтитула у полі " +"пошуку." #: /home/kovid/work/calibre/src/calibre/gui2/convert/structure_detection_ui.py:64 msgid "Remove &fake margins" @@ -8864,6 +8912,14 @@ msgid "" "enter the full path to the file after a >>. For example:

\n" "

9788842915232 >> %s

" msgstr "" +"

Вкажіть список ISBN у полі, розташованому ліворуч, один запис на рядок. " +"calibre автоматично створить записи книг на основі ISBN і отримає метадані " +"та зображення обкладинок для цих записів.

\n" +"

Всі некоректні записи ISBN у списку буде проігноровано.

\n" +"

Крім того, ви можете вказати файл, який буде додано з кожним ISBN. Щоб " +"зробити це, вкажіть шлях до файла повністю після >>. " +"Приклад:

\n" +"

9788842915232 >> %s

" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:69 msgid "&Tags to set on created book entries:" @@ -9501,6 +9557,10 @@ msgid "" "Copy author sort to author for every author. You typically use this button\n" "after changing Preferences->Advanced->Tweaks->Author sort name algorithm" msgstr "" +"Копіювати впорядкування запису автора до поля автора для всіх авторів. " +"Типово, цим пунктом\n" +"слід користуватися після зміни «Налаштування -> Додатково -> Коригування -> " +"Алгоритм впорядковування імен авторів»." #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/edit_authors_dialog_ui.py:99 msgid "Copy all author sort values to author" @@ -10604,6 +10664,14 @@ msgid "" "only restores books, not any settings stored in the database, or any custom " "recipes.

Do you want to restore the database?" msgstr "" +"Ваш список книг разом з усіма метаданими книг зберігається у окремому файлі, " +"який називається базою даних. Крім того, метадані для кожної окремої книги " +"зберігаються у теці книги, як резервна копія.

За допомогою цієї дії можна " +"перебудувати базу даних на основі метаданих окремих книг. Це буде корисним, " +"якщо базу даних було пошкоджено, а список книг спорожнено. Зауважте, що під " +"час відновлення буде відновлено лише основні дані книг, а не параметри, які " +"зберігалися у базі даних, або нетипові рецепти.

Бажаєте відновити базу " +"даних?" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/restore_library.py:117 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/restore_library.py:131 @@ -11363,6 +11431,13 @@ msgid "" "windows you used to edit files in the epub.

Rebuild the ePub, " "updating your calibre library.

" msgstr "" +"

Розгорнути ePub для показу вмісту у вікні програми для керування файлами. " +"Щоб скоригувати окремі файли, клацніть правою кнопкою миші, потім виберіть " +"пункт «Відкрити за допомогою…» і позначте пункт улюбленого редактора. Після " +"завершення коригування закрийте вікно програми для керування файлами та " +"вікна редактора, яким ви скористалися для редагування файлів у " +"epub.

Повторно зберіть ePub, оновіть дані у вашій базі даних " +"calibre.

" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/tweak_epub_ui.py:62 msgid "Display contents of exploded ePub" @@ -11700,6 +11775,15 @@ msgid "" "group names for the various metadata entries are documented in " "tooltips.

" msgstr "" +"
\n" +"

Вкажіть взірець формального виразу, яким слід скористатися для визначення " +"метаданих електронної книги на основі назв файлів.

\n" +"

Ви можете скористатися підручником з формальних виразів.

\n" +"

Скористайтеся кнопкою Тест, розташованою нижче, щоб перевірити ваш " +"формальний вираз на декількох зразках назв файлів (не забудьте використати " +"суфікс назви файла). З документацією щодо назв груп для різноманітних " +"записів метаданих можна ознайомитися за допомогою панелей підказок.

" #: /home/kovid/work/calibre/src/calibre/gui2/filename_pattern_ui.py:133 msgid "Regular &expression" @@ -12500,6 +12584,11 @@ msgid "" "If the box is colored green, then text matches the individual author's sort " "strings. If it is colored red, then the authors and this text do not match." msgstr "" +"Визначає спосіб впорядкування записів авторів цієї книги. Наприклад, «Чарлз " +"Діккенс» буде впорядковано як «Діккенс, Чарлз».\n" +"Якщо поле зафарбовано зеленим, текст відповідає окремим рядкам впорядкування " +"записів авторів. Якщо поле зафарбовано червоним, запис авторів і цей текст " +"не збігаються." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/basic_widgets.py:319 msgid "Author s&ort:" @@ -12808,6 +12897,10 @@ msgid "" "Using this button to create title sort will change title sort from red to " "green." msgstr "" +"Автоматично створити запис впорядкування заголовків на основі поточного " +"запису заголовка.\n" +"Використання цієї кнопки для створення впорядкування записів заголовка " +"змінить колір впорядкування заголовка з червоного на зелений." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:130 msgid "" @@ -12816,6 +12909,11 @@ msgid "" "red to green. There is a menu of functions available under this button. " "Click and hold on the button to see it." msgstr "" +"Автоматично створити запис впорядкування запису авторів на основі поточного " +"запису авторів. Використання цієї кнопки для створення впорядкування записів " +"авторів змінить колір впорядкування записів авторів з червоного на зелений. " +"Ви можете скористатися меню керування цієї кнопкою. Наведіть вказівник миші " +"на цю кнопку, натисніть і утримуйте ліву кнопку миші, щоб відкрити це меню." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:136 msgid "Set author sort from author" @@ -12973,6 +13071,10 @@ msgid "" "single distinctive word from the title.

To see the full log, click Show " "Details." msgstr "" +"Не вдалося знайти жодної книги, яка відповідає вказаним вами параметрам " +"пошуку. Спробуйте розширити умови пошуку. Наприклад, використовуйте " +"лише прізвище автора та одне слово з назви, яке пам’ятаєте напевно.

Щоб " +"переглянути журнал пошуку, натисніть кнопку «Показати подробиці»." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:551 msgid "Current cover" @@ -13442,6 +13544,11 @@ msgid "" "that tell calibre what color to use. Click the Add Rule button below to get " "started.

You can change an existing rule by double clicking it." msgstr "" +"Ви можете керувати кольором стовпчиків у списку книг за допомогою створення " +"«правил», які повідомлятимуть calibre про те, яким кольором слід " +"користуватися. Натисніть кнопку «Додати правило», розташовану нижче, щоб " +"розпочати створення правила.

Ви можете змінити вже створене правило " +"подвійним клацанням на його пункті." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/coloring.py:491 msgid "Add Rule" @@ -13788,6 +13895,11 @@ msgid "" "a red X.\n" "Everything else will show nothing." msgstr "" +"Показувати позначки у графічному інтерфейсі. Значення «так», «позначено» та " +"«true»\n" +"буде показано зеленою галочкою. Значення «ні», «не позначено» та «false» " +"буде показано червоним хрестиком.\n" +"Для всіх інших значень не буде показано нічого." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/create_custom_column_ui.py:229 msgid "Show checkmarks" @@ -14026,6 +14138,10 @@ msgid "" "used for the subject. Also, the same templates used for \"Save to disk\" " "such as {title} and {author_sort} can be used here." msgstr "" +"Тема повідомлення електронної пошти, яке буде надіслано. Якщо не буде " +"вказано, у полі теми буде використано заголовок. Крім того, ви можете " +"скористатися тими самими шаблонами, які використовуються для «Зберегти на " +"диск», зокрема {title} і {author_sort}." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/emailp.py:37 msgid "" @@ -14745,6 +14861,11 @@ msgid "" "particular book does not have some metadata, the variable will be replaced " "by the empty string." msgstr "" +"Коригуванням наведеного нижче шаблону ви можете визначити, у яких теках буде " +"збережено файли, і які назви буде цим файлам надано. Для визначення підтек " +"ви можете скористатися символом «/». Нижче наведено опис доступних змінних " +"метаданих. Якщо у певній книзі немає деяких метаданих, змінну буде замінено " +"на порожній рядок." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/save_template_ui.py:59 msgid "Available variables:" @@ -15044,7 +15165,7 @@ msgstr "&Макс. к-ть пунктів OPDS на запит:" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server_ui.py:78 msgid "Max. OPDS &ungrouped items:" -msgstr "" +msgstr "Макс. к-ть пунктів OPDS &без групи:" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server_ui.py:82 msgid "Restriction (saved search) to apply:" @@ -15359,6 +15480,9 @@ msgid "" "these tweaks should be available on the website from where you downloaded " "the plugins." msgstr "" +"Додавання коригувань або внесення змін до коригувань для всіх встановлених " +"вами нетипових додатків. З документацією щодо цих коригувань ви можете " +"ознайомитися на сайті, з якого ви отримали ці додатки." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/tweaks.py:324 msgid "Search for tweak" From f6bd4d17097c6358c4fecd25adc4cc24ce4eac4d Mon Sep 17 00:00:00 2001 From: Translators <> Date: Fri, 23 Dec 2011 05:34:51 +0000 Subject: [PATCH 02/74] Launchpad automatic translations update. --- src/calibre/translations/es.po | 8 +- src/calibre/translations/fr.po | 49 +++++++-- src/calibre/translations/uk.po | 188 ++++++++++++++++++++++++++++++++- 3 files changed, 233 insertions(+), 12 deletions(-) diff --git a/src/calibre/translations/es.po b/src/calibre/translations/es.po index 969e8ef683..d7d4a1ecbd 100644 --- a/src/calibre/translations/es.po +++ b/src/calibre/translations/es.po @@ -11,15 +11,15 @@ msgstr "" "Project-Id-Version: es\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-16 14:00+0000\n" +"PO-Revision-Date: 2011-12-22 10:25+0000\n" "Last-Translator: Jellby \n" "Language-Team: Spanish\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" -"X-Launchpad-Export-Date: 2011-12-17 05:09+0000\n" -"X-Generator: Launchpad (build 14525)\n" +"X-Launchpad-Export-Date: 2011-12-23 05:34+0000\n" +"X-Generator: Launchpad (build 14560)\n" #: /home/kovid/work/calibre/src/calibre/customize/profiles.py:527 msgid "" @@ -5857,7 +5857,7 @@ msgstr "Editar metadatos por separado" #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:35 msgid "Edit metadata in bulk" -msgstr "Edita metadatos en bloque" +msgstr "Editar metadatos en bloque" #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:38 msgid "Download metadata and covers" diff --git a/src/calibre/translations/fr.po b/src/calibre/translations/fr.po index cec518fc25..7bbc91e219 100644 --- a/src/calibre/translations/fr.po +++ b/src/calibre/translations/fr.po @@ -8,14 +8,14 @@ msgstr "" "Project-Id-Version: calibre 0.4.22\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-21 19:46+0000\n" +"PO-Revision-Date: 2011-12-22 20:48+0000\n" "Last-Translator: sengian \n" "Language-Team: Français \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n > 1;\n" -"X-Launchpad-Export-Date: 2011-12-22 05:25+0000\n" +"X-Launchpad-Export-Date: 2011-12-23 05:34+0000\n" "X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: 1177,1104,-1,-1,-1,-1,-1,-1,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -12949,7 +12949,7 @@ msgid "" " The green color indicates that the current title sort matches the current " "title" msgstr "" -" La couleur verte indique que la clé de tri de titre actuelle correspond au " +" La couleur verte indique que la clé de tri par titre actuelle correspond au " "titre actuel." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/basic_widgets.py:151 @@ -12957,7 +12957,7 @@ msgid "" " The red color warns that the current title sort does not match the current " "title. No action is required if this is what you want." msgstr "" -" La couleur rouge indique que la clé de tri de titre actuelle ne correspond " +" La couleur rouge indique que la clé de tri par titre actuelle ne correspond " "pas au titre actuel. Aucune action n'est requise si c'est ce que vous voulez." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/basic_widgets.py:225 @@ -13295,9 +13295,9 @@ msgid "" "Using this button to create title sort will change title sort from red to " "green." msgstr "" -"Crée automatiquement la clé de tri de titre à partir de la valeur actuelle " +"Crée automatiquement la clé de tri par titre à partir de la valeur actuelle " "de l'entrée titre.\n" -"L'utilisation de ce bouton pour créer la clé de tri de titre changera la " +"L'utilisation de ce bouton pour créer la clé de tri par titre changera la " "couleur de ce champ de rouge à vert." #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:130 @@ -21618,6 +21618,26 @@ msgid "" "categories_use_field_for_author_name = 'author'\n" "categories_use_field_for_author_name = 'author_sort'" msgstr "" +"Choisir le champ d'auteur à afficher dans le panneau d'étiquettes (la liste " +"des auteurs,\n" +"la série, l'éditeur, etc du coté gauche). Les choix sont auteur et clé de " +"tri\n" +"par auteur. Cette personnalisation affecte seulement ce qui est affiché " +"sous\n" +"la catégorie auteur dans le panneau d'étiquettes et le serveur de contenu.\n" +"Veuillez noter que si vous choisissez la clé de tri par auteur, il est très " +"possible\n" +"que des doublons apparaissent dans la liste des noms car bien que les noms " +"des\n" +"auteurs soient uniques, il n'y a pas la même garantie pour la clé de tri par " +"auteur.\n" +"L'affichage des doublons ne va pas causer de problèmes, mais peut entrainer\n" +"une certaine confusion. Lorsque 'author_sort' est utilisé, l'infobulle " +"affichera le\n" +"nom de l'auteur.\n" +"Exemples :\n" +"categories_use_field_for_author_name = 'author'\n" +"categories_use_field_for_author_name = 'author_sort'" #: /home/kovid/work/calibre/resources/default_tweaks.py:99 msgid "" @@ -21811,6 +21831,23 @@ msgid "" "will become \"Lord of the Rings, The\". If the tweak is set to\n" "strictly_alphabetic, it would remain \"The Lord of the Rings\"." msgstr "" +"Contrôle comment le titre et la série sont formatés lors de la sauvegarde\n" +"sur le disque/l'envoi à l'appareil. Ce comportement dépend du champ traité.\n" +"Si il s'agit du titre, alors si cette personnalisation est fixé à " +"'library_order',\n" +"le titre sera remplacé par la clé de tri par titre. Si elle est fixée à " +"'strictly_alphabetic',\n" +"alors le titre ne sera pas modifié. Si il s'agit de la série, alors si elle " +"est fixée à\n" +"'library_order', les articles comme 'The' (Le) et 'An' (Un) seront déplacés " +"à la fin.\n" +"Si elle est fixée à 'strictly_alphabetic', alors la série sera envoyée sans " +"modification.\n" +"Par exemple, si cette personnalisation est fixée à library_order, \"The Lord " +"of the Rings\"\n" +"deviendra \"Lord of the Rings, The\". Si elle est fixée à " +"strictly_alphabetic, cela\n" +"restera \"The Lord of the Rings\"." #: /home/kovid/work/calibre/resources/default_tweaks.py:201 msgid "Set the list of words considered to be \"articles\" for sort strings" diff --git a/src/calibre/translations/uk.po b/src/calibre/translations/uk.po index c3ca543c04..b259db31f2 100644 --- a/src/calibre/translations/uk.po +++ b/src/calibre/translations/uk.po @@ -9,7 +9,7 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME \n" "POT-Creation-Date: 2011-12-16 09:06+0000\n" -"PO-Revision-Date: 2011-12-21 20:01+0000\n" +"PO-Revision-Date: 2011-12-22 18:57+0000\n" "Last-Translator: yurchor \n" "Language-Team: Ukrainian \n" "MIME-Version: 1.0\n" @@ -17,7 +17,7 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && " "n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" -"X-Launchpad-Export-Date: 2011-12-22 05:26+0000\n" +"X-Launchpad-Export-Date: 2011-12-23 05:34+0000\n" "X-Generator: Launchpad (build 14560)\n" "Language: uk\n" @@ -5929,6 +5929,14 @@ msgid "" "and subsequently selected books will be permanently deleted from your " "calibre library.

Are you sure you want to proceed?" msgstr "" +"Дані форматів позначених книг буде об’єднано до першої позначеної " +"книги (%s). Метадані у першій позначеній книзі не буде змінено. Дані " +"щодо авторів, заголовків, ISBN та всі інші метадані не буде " +"об’єднано.

Після об’єднання другу і всі інші позначені книги, з усіма " +"метаданими, що у них містяться, буде вилучено.

Всі формати " +"першої позначеної книги не буде вилучено, всі ж формати-дублікати другої та " +"інших позначених книг буде остаточно вилучено з вашої бібліотеки " +"calibre.

Ви справді бажаєте, що усі ці дії було виконано?" #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:303 #, python-format @@ -5941,6 +5949,13 @@ msgid "" "books will be permanently deleted from your calibre library.

" "Are you sure you want to proceed?" msgstr "" +"Дані форматів та метадані позначених книг буде об’єднано до першої " +"позначеної книги (%s). Дані ISBN не буде об’єднано.

Після " +"об’єднання другу і всі інші позначені книги буде вилучено. " +"

Всі формати першої позначеної книги не буде вилучено, всі ж формати-" +"дублікати другої та інших позначених книг буде остаточно вилучено з " +"вашої бібліотеки calibre.

Ви справді бажаєте, що усі ці дії " +"було виконано?" #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:473 msgid "Applying changed metadata" @@ -6275,6 +6290,13 @@ msgid "" "especially if the book you are buying has DRM." msgstr "" +"Всі фінансові операції (сплати тощо) виконуються між вами та продавцем книг. " +"Calibre не виконує жодної обробки даних протягом цих процедур, отже всі " +"проблеми, пов’язані з купівлею, слід обговорювати з адміністрацією сайта, на " +"якому ви купуєте книгу. Переконайтеся, що всі придбані вами книги можна буде " +"читати на вашому пристрої для читання електронних книг, особливо якщо книгу, " +"яку ви купуєте, захищено DRM." #: /home/kovid/work/calibre/src/calibre/gui2/actions/store.py:152 msgid "Show this message again" @@ -7412,6 +7434,19 @@ msgid "" "size-rescaling\">User Manual for a discussion of how font size rescaling " "works.

" msgstr "" +"

За допомогою цієї програми-майстра ви зможете вибрати потрібні вами " +"розміри шрифтів. Просто вкажіть основний розмір шрифту у початковому " +"документі, а потім вкажіть початковий розмір шрифту. Програма покаже " +"відповідники розмірів шрифтів, отримані у результаті застосування алгоритму " +"зміни масштабу. Ви можете скоригувати алгоритм зміною розміру основного " +"шрифту результату та ключа шрифту нижче. Після визначення бажаного значення " +"натисніть кнопку «Гаразд».

\n" +"

Типово, якщо розмір основного шрифту результату є нульовим і/або не " +"вказано ключового розміру шрифту, calibre використає значення з поточного " +"профілю виведення даних.

\n" +"

Обговорення алгоритмів зміни масштабу шрифтів можна знайти у підручнику користувача.

" #: /home/kovid/work/calibre/src/calibre/gui2/convert/font_key_ui.py:108 msgid "&Output document" @@ -7486,6 +7521,15 @@ msgid "" "the User Manual." msgstr "" +"Евристична обробка — це обробка, протягом якої calibre виконає пошук " +"типових взірців у вашій книзі і спробує їх виправити. Як можна зрозуміти з " +"назви, під час обробки використовуються певні припущення, що може призвести " +"до погіршення якості результату перетворення, якщо припущення calibre не " +"справдяться. Тому цей режим типово вимкнено. Часто, якщо результати " +"перетворення вас не задовольняють, вмикання евристики може значно покращити " +"справи. Докладніше про різноманітні аспекти використання евристики можна " +"дізнатися з підручника користувача." #: /home/kovid/work/calibre/src/calibre/gui2/convert/heuristics_ui.py:114 msgid "Enable &heuristic processing" @@ -8529,6 +8573,13 @@ msgid "" "tag.

To learn more advanced usage of XPath see the XPath Tutorial." msgstr "" +"

Наприклад, щоб було знайдено всі теґи h2, для яких class=\"chapter\", " +"встановіть для теґу значення h2, для атрибута — значення class " +"і для значення — значення chapter.

Якщо поле атрибута не буде " +"заповнено, програма встановить відповідність для всіх атрибутів, а якщо не " +"буде вказано значення — для всіх значень. Значення теґу «*» відповідає будь-" +"якому теґу.

Докладніше про використання XPath можна дізнатися з настанов щодо XPath." #: /home/kovid/work/calibre/src/calibre/gui2/cover_flow.py:145 msgid "Browse by covers" @@ -9730,6 +9781,15 @@ msgid "" "checked, the search text must match exactly. If it is unchecked, the search " "text will match both upper- and lower-case letters" msgstr "" +"У режимі звичайних символів програма виконуватиме у полі пошук звичайного " +"набору символів, введених користувачем у поле пошуку. Всюди, де цей набір " +"буде знайдено у вказаному полі, його буде замінено на вказаний текстовий " +"фрагмент заміни. Після завершення заміни регістр символів зміненого " +"фрагмента тексту може бути виправлено на великі або малі, програма може " +"зробити великими лише перші літери кожного слова. Якщо буде позначено пункт " +"врахування регістру, регістр символів для пошуку має точно збігатися з " +"регістром символів знайденого тексту. Якщо пункт не буде позначено, пошук " +"відбуватиметься без врахування регістру." #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_bulk.py:467 msgid "" @@ -14996,6 +15056,11 @@ msgid "" "search not just authors but title/tags/series/comments/etc. Use these " "options if you would like to change this behavior." msgstr "" +"Якщо буде вказано ключове слово пошуку без префіксу, типово calibre " +"виконуватиме пошук у всіх метаданих. Наприклад, пошук за рядком «азімов» " +"виконуватиметься не лише у полі авторів, але і у полях назв книг, мітках, " +"назва серій, коментарях тощо. Скористайтеся цими пунктами, якщо ви бажаєте " +"змінити цю поведінку." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/search_ui.py:114 msgid "&Limit the searched metadata" @@ -15105,6 +15170,13 @@ msgid "" "

  • Automatic management: Calibre automatically keeps metadata on the " "device in sync with the calibre library, on every connect
  • " msgstr "" +"
  • Керування вручну: Calibre оновлює метадані і додає збірки лише " +"під час надсилання книги. Якщо буде вибрано цей варіант, calibre ніколи не " +"вилучатиме збірки.
  • \n" +"
  • Лише під час надсилання: Calibre оновлює метадані і додає або " +"вилучає збірки для книги лише під час надсилання на пристрій.
  • \n" +"
  • Автоматичне керування: Calibre автоматично синхронізує метадані " +"на пристрої з бібліотекою calibre під час кожного з’єднання.
  • " #: /home/kovid/work/calibre/src/calibre/gui2/preferences/sending_ui.py:64 msgid "" @@ -15177,6 +15249,10 @@ msgid "" "content server makes available to those matching the search. This setting is " "per library (i.e. you can have a different restriction per library)." msgstr "" +"Це обмеження (засноване на збереженому пошуку) скорочує список книг, доступ " +"до яких надає сервер даних, до книг, які відповідають критерію пошуку. " +"Значення цього параметра стосується окремої бібліотеки (тобто ви можете " +"визначати різні обмеження для різних бібліотек)." #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server_ui.py:95 msgid "&Start Server" @@ -15782,6 +15858,9 @@ msgid "" "the store caters to. However, this does not necessarily mean that the store " "is limited to that market only." msgstr "" +"Цю крамницю розташовано у %s. Розташування вказує на те, який ринок " +"обслуговується крамницею. Це, втім, не означає, що послуги крамниці " +"обмежується лише відповідним ринком." #: /home/kovid/work/calibre/src/calibre/gui2/store/config/chooser/models.py:143 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:249 @@ -15934,6 +16013,11 @@ msgid "" "what you can do with this book. Check with the store before making any " "purchases to ensure you can actually read this book." msgstr "" +"Користування цієї книгою обмежено DRM. Можливо, цю книгу не можна буде " +"прочитати на вашому пристрої для читання. Крім того, ваші дії з цією книгою " +"може бути обмежено. Ознайомтеся з правилами крамниці щодо поводження з " +"придбаними книгами до купівлі, щоб переконатися, що ви зможете прочитати " +"придбану книгу." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:239 msgid "" @@ -15942,6 +16026,12 @@ msgid "" "conversion. However, before making a purchase double check the DRM status " "with the store. The store may not be disclosing the use of DRM." msgstr "" +"Виявлено, що книгу не захищено DRM. Ймовірно, ви зможете прочитати книгу на " +"будь-якому пристрої для читання електронних книг, якщо підтримку " +"перетворення книг у цьому форматі передбачено у calibre. Втім, вам варто " +"переконатися у тому, що книгу справді не захищено DRM на сайті крамниці, до " +"придбання цієї книги. Можливо, крамниця просто не повідомляє про " +"використання DRM." #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:241 msgid "" @@ -16050,6 +16140,14 @@ msgid "" "will be a .epub file. You can add this book to calibre using \"Add Books\" " "and selecting the file from the ADE library folder." msgstr "" +"Ця книга зберігається у форматі захищеного DRM файла EPUB. Програма " +"попросить вас підтвердити зберігання цього файла на вашому комп’ютері. Щойно " +"книгу буде збережено, відкрийте книгу за допомогою Adobe Digital " +"Editions (ADE).

    Після цього ADE отримає справжні дані електронної " +"книги, тобто дані файла .epub. Ви можете додати цю книгу до бібліотеки " +"calibre за допомогою пункту «Додати книги» з наступним вибором файла з теки " +"бібліотеки ADE." #: /home/kovid/work/calibre/src/calibre/gui2/store/web_control.py:88 msgid "File is not a supported ebook type. Save to disk?" @@ -17233,6 +17331,12 @@ msgid "" "button below. You will also have to register your gmail address in your " "Amazon account." msgstr "" +"

    calibre може надсилати книги на ваш пристрій Kindle за допомогою " +"електронної пошти у автоматичному режимі. Щоб програма мала змогу робити це, " +"вам слід налаштувати надсилання нижче. Найпростіший спосіб: вкажіть " +"безкоштовний обліковий запис gmail і " +"натисніть розташовану нижче кнопку «Використовувати gmail». Вам також слід " +"зареєструвати вашу адресу gmail у вашому обліковому записі Amazon." #: /home/kovid/work/calibre/src/calibre/gui2/wizard/kindle_ui.py:50 msgid "&Kindle email:" @@ -17314,6 +17418,10 @@ msgid "" "your %s email address to the allowed email addresses in your Amazon.com " "Kindle management page." msgstr "" +"Якщо ви плануєте користуватися електронною поштою для надсилання книг на ваш " +"пристрій Kindle, не забудьте додати вашу електронну адресу %s до списку " +"дозволених адрес електронної пошти на сторінці керування Kindle на " +"Amazon.com." #: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:95 msgid "Setup" @@ -17346,6 +17454,11 @@ msgid "" "verify your account periodically, before it will let calibre send email. In " "this case, I strongly suggest you setup a free gmail account instead." msgstr "" +"Якщо ви налаштовуєте новий обліковий запис hotmail, вам слід знати, що " +"Microsoft вимагає періодичної перевірки вашого облікового запису до того, як " +"надсилання книг за допомогою calibre стане можливим. Якщо ви не можете " +"скористатися надсиланням через це, наполегливо рекомендуємо вам " +"користуватися безкоштовним обліковим записом gmail." #: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:221 #: /home/kovid/work/calibre/src/calibre/gui2/wizard/send_email.py:232 @@ -17495,6 +17608,11 @@ msgid "" "directly on the device. To do this you have to turn on the calibre content " "server." msgstr "" +"

    Якщо ви користуєтеся програмою для читання електронних книг Stanza на вашому " +"iPhone/iTouch, ви зможете отримати доступ до вашої збірки книг calibre " +"безпосередньо з пристрою. Доступ до збірки можна буде отримати лише після " +"вмикання сервера даних calibre." #: /home/kovid/work/calibre/src/calibre/gui2/wizard/stanza_ui.py:50 msgid "Turn on the &content server" @@ -17582,6 +17700,13 @@ msgid "" "Default: '%%default'\n" "Applies to: CSV, XML output formats" msgstr "" +"Поля, дані яких слід виводити під час каталогізації книг у базі даних. Слід " +"визначати у форматі відокремленого комами списку полів.\n" +"Можливі поля: %(fields)s,\n" +"і всі створені користувачем поля.\n" +"Приклад: %(opt)s=title,authors,tags\n" +"Типові поля: «%%default»\n" +"Застосування: формати виведення CSV, XML" #: /home/kovid/work/calibre/src/calibre/library/catalog.py:69 #, python-format @@ -17607,6 +17732,13 @@ msgid "" "Default: '%%default'\n" "Applies to: BIBTEX output format" msgstr "" +"Поля, дані яких слід виводити під час каталогізації книг у базі даних. Слід " +"визначати у форматі відокремленого комами списку полів.\n" +"Можливі поля: %(fields)s,\n" +"і всі створені користувачем поля.\n" +"Приклад: %(opt)s=title,authors,tags\n" +"Типові поля: «%%default»\n" +"Застосування: формат виведення BIBTEX" #: /home/kovid/work/calibre/src/calibre/library/catalog.py:265 #, python-format @@ -17656,6 +17788,11 @@ msgid "" "Default: '%%default'\n" "Applies to: BIBTEX output format" msgstr "" +"Шаблон для створення посилань на основі полів бази даних.\n" +"Має формат шаблону з полями у фігурних дужках, {}.\n" +"Можливі поля: %s.\n" +"Типове значення: «%%default»\n" +"Застосування: формат виведення BIBTEX" #: /home/kovid/work/calibre/src/calibre/library/catalog.py:302 #, python-format @@ -18255,6 +18392,8 @@ msgid "" "This column stores tag like data (i.e. multiple comma separated values). " "Only applies if datatype is text." msgstr "" +"У цьому стовпчику зберігаються дані, подібні до міток (тобто значення, " +"відокремлені комами). Застосовується, лише якщо типом даних є «text»." #: /home/kovid/work/calibre/src/calibre/library/cli.py:588 msgid "" @@ -18284,6 +18423,10 @@ msgid "" "If declared, --search is ignored.\n" "Default: all" msgstr "" +"Список значень ідентифікаторів бази даних, відокремлених комами, для " +"каталогізації.\n" +"Якщо визначено, --search буде проігноровано.\n" +"Типове значення: всі значення" #: /home/kovid/work/calibre/src/calibre/library/cli.py:681 msgid "" @@ -18365,6 +18508,12 @@ msgid "" " columns with the custom_columns command.\n" " " msgstr "" +"\n" +" %prog remove_custom_column [параметри] мітка\n" +"\n" +" Вилучити нетиповий стовпчик, визначений міткою. Переглянути список\n" +" доступних стовпчиків можна за допомогою команди custom_columns.\n" +" " #: /home/kovid/work/calibre/src/calibre/library/cli.py:822 msgid "Do not ask for confirmation" @@ -18386,6 +18535,17 @@ msgid "" " replaced.\n" " " msgstr "" +"\n" +" %prog saved_searches [параметри] список\n" +" %prog saved_searches add назва пошук\n" +" %prog saved_searches remove назва\n" +"\n" +" Керування збереженими записами пошуків, що зберігаються у цій базі " +"даних.\n" +" Якщо ви спробуєте додати запит з назвою, яку вже записано до бази " +"даних,\n" +" попередній запис буде замінено новим.\n" +" " #: /home/kovid/work/calibre/src/calibre/library/cli.py:860 msgid "Error: You must specify an action (add|remove|list)" @@ -18500,6 +18660,10 @@ msgid "" "Produce a report of the category information in the database. The\n" "information is the equivalent of what is shown in the tags pane.\n" msgstr "" +"%prog list_categories [параметри]\n" +"\n" +"Створити звіт щодо даних категорій у базі даних. Отримані дані\n" +"еквівалентні до даних, показаних на панелі міток.\n" #: /home/kovid/work/calibre/src/calibre/library/cli.py:1052 msgid "" @@ -19111,6 +19275,10 @@ msgid "" "and consume more resources. Most tasks like conversion/news download/adding " "books/etc. are affected by this setting." msgstr "" +"Пріоритет процесів обробки. Вище значення пріоритету означає, що обробка " +"відбуватиметься швидше зі споживанням більшого об’єму ресурсів. Значенням " +"цього параметра керуються більшість процесів, зокрема процеси перетворення, " +"отримання новин, додавання книг тощо." #: /home/kovid/work/calibre/src/calibre/utils/config_base.py:399 msgid "Swap author first and last names when reading metadata" @@ -19749,6 +19917,9 @@ msgid "" "passed in lang_strings. The strings must be in the language of the current " "locale. Lang_strings is a comma-separated list." msgstr "" +"language_codes(рядки_мов) — повертає коди мов для рядків, переданих як " +"рядки_мов. Рядками мають бути записи у поточній локалі. Окремі записи у " +"параметрі рядки_мов слід відокремлювати комами." #: /home/kovid/work/calibre/src/calibre/utils/ipc/job.py:43 msgid "Waiting..." @@ -20822,6 +20993,9 @@ msgid "" "Set this to True to ensure that tags in 'Tags to add when adding\n" "a book' are added when copying books to another library" msgstr "" +"Встановіть значення True, щоб мітки з поля «Мітки, які слід додавати\n" +"під час додавання книги» було додано під час копіювання книг до\n" +"іншої бібліотеки." #: /home/kovid/work/calibre/resources/default_tweaks.py:339 msgid "Set the maximum number of tags to show per book in the content server" @@ -20951,6 +21125,11 @@ msgid "" "two\n" "columns. If False, one column is used." msgstr "" +"Визначає скільки стовпчиків слід використовувати під час редагування " +"нетипових\n" +"метаданих окремої книги. Якщо True, поля буде розподілено між двома " +"стовпчиками.\n" +"Якщо значенням буде False, використовуватиметься один стовпчик." #: /home/kovid/work/calibre/resources/default_tweaks.py:411 msgid "Order of custom column(s) in edit metadata" @@ -21003,6 +21182,11 @@ msgid "" "to fit within this size. This is to prevent slowdowns caused by extremely\n" "large covers" msgstr "" +"Розміри всіх зображень обкладинок у бібліотеці calibre буде змінено так, " +"щоб\n" +"вони не перевищували цих розмірів, зі збереженням співвідношення розмірів.\n" +"Призначено для того, щоб запобігти уповільненню, спричиненому завантаженням\n" +"дуже великих зображень обкладинок." #: /home/kovid/work/calibre/resources/default_tweaks.py:442 msgid "Where to send downloaded news" From cb4ec8cd398ce690231630acb59a41d1f84c9ec9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 23 Dec 2011 22:13:24 +0530 Subject: [PATCH 03/74] Fix regression in 0.8.32 that broke deleting some books with non ascii characters in the title/author. Fixes #908068 (Error deleting ebooks with utf8 encode (calibre 0.8.32-linux)) --- src/calibre/utils/linux_trash.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/linux_trash.py b/src/calibre/utils/linux_trash.py index 84d35d71b7..9c1559435b 100644 --- a/src/calibre/utils/linux_trash.py +++ b/src/calibre/utils/linux_trash.py @@ -36,6 +36,11 @@ uid = os.getuid() TOPDIR_TRASH = '.Trash' TOPDIR_FALLBACK = '.Trash-%s'%uid +def uniquote(raw): + if isinstance(raw, unicode): + raw = raw.encode('utf-8') + return quote(raw).decode('utf-8') + def is_parent(parent, path): path = op.realpath(path) # In case it's a symlink parent = op.realpath(parent) @@ -53,7 +58,7 @@ def info_for(src, topdir): src = op.relpath(src, topdir) info = "[Trash Info]\n" - info += "Path=" + quote(src) + "\n" + info += "Path=" + uniquote(src) + "\n" info += "DeletionDate=" + format_date(datetime.now()) + "\n" return info From ffd28fe2c6b51a795adaf9a1ad44c0b53d0e1aa2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 24 Dec 2011 09:40:59 +0530 Subject: [PATCH 04/74] ... --- Changelog.old.yaml | 4210 ++++++++++++++++++++++++++++++++++++++++++++ Changelog.yaml | 4210 -------------------------------------------- 2 files changed, 4210 insertions(+), 4210 deletions(-) diff --git a/Changelog.old.yaml b/Changelog.old.yaml index 0bdd7ba746..0e601874a5 100644 --- a/Changelog.old.yaml +++ b/Changelog.old.yaml @@ -1,3 +1,4213 @@ +- version: 0.7.59 + date: 2011-04-30 + + bug fixes: + - title: "Fixes a bug in 0.7.58 that caused too small fonts when converting to MOBI for the Kindle. Apologies." + + - title: "Apple driver: Handle invalid EPUBs that do not contain an OPF file" + + new recipes: + - title: The Big Picture and Auto industry news + author: welovelucy + + - title: Gazeta Prawna + author: Vroo + + - title: Various Czech news sources + author: Tomas Latal + + - title: Diario de Ibiza + author: Joan Tur + +- version: 0.7.58 + date: 2011-04-29 + + new features: + - title: "Support for converting and reading metadata from Plucker format PDB files" + type: major + + - title: "The metadata that is displayed in the book details panel on the right is now completely configurable via Preferences->Look & Feel" + + - title: "Add a column that shows the date when the metadata of a book record was last modified in calibre. To see the column, right click on the column headers in calibre and select Show column->Modified. Note that the dates may be incorrect for books added with older versions of calibre." + + - title: "Add command line option to shutdown running calibre" + + - title: "CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified" + + - title: "Add a popup menu to the 'Create saved search button' to allow easy deleting of saved searches" + + bug fixes: + - title: "Fix regression that broke converting to LIT in 0.7.57" + tickets: [769334] + + - title: "Conversion pipeline: Remove encoding declarations from input HTML documents to guarantee that there is only a single encoding declaration in the output HTML." + tickets: [773337] + + - title: "Correctly parenthesize searches that are used to make search restrictions" + + - title: "Fix ratings in save to disk templates not being divided by 2" + + - title: "TXT to EPUB: Underlined words (following quotes?) fail to become italics" + tickets: [772267] + + - title: "Fix template function source code unavailable when not running calibre from source" + + - title: "Fix adding html books from the top of a deep folder hierarchy very slow" + + - title: "Only set language in MOBI metadata if it is not null" + + - title: "Fix 'count-of' searches (e.g., tags:#>3)." + tickets: [771175] + + - title: "Fix regression that broke connection to iTunes in some cases" + tickets: [771164] + + - title: "Fix buggy regex that made converting PDFs with the string ****************** very slow" + tickets: [770534] + + - title: "Fix Ctrl+L shortcut to lookup word not working in ebook viewer" + tickets: [769492] + + - title: "Fix regression that broke searching on boolean columns" + + improved recipes: + - HBR Blogs + - The Marker + - Financial Times + - Clarin + - Honolulu Star Advertiser + + new recipes: + - title: Novi Standard + author: Darko Miletic + + - title: Autobild.ro and Social Diva + author: Silviu Cotoara + + - title: Novinky + author: Tomas Latal + + - title: "De Volksrant (subscriber version)" + author: Selcal + + +- version: 0.7.57 + date: 2011-04-22 + + new features: + - title: "Launch worker processes on demand instead of keeping a pool of them in memory. Reduces memory footprint." + + - title: "Use the visual formatting of the Table of Contents to try to automatically create a multi-level TOC when converting/viewing MOBI files." + tickets: [763681] + + - title: "Add a new function booksize() to the template language to get the value of the size column in calibre." + + - title: "Add support for using metadata plugboards with the content server (only with the epub format)" + + - title: "Change default algorithm for automatically computing author sort to be more intelligent and handle the case when the author name has a comma in it" + + - title: "Show cover size in the tooltips of the book details panel and book details popup window" + + bug fixes: + - title: "Dragging and dropping a cover onto the book details panel did not change the cover size" + tickets: [768332] + + - title: "Fix non-escaped '|' when searching for commas in authors using REGEXP_MATCH" + + - title: "Fix ratings in templates being multiplied by 2" + + - title: "Fix adding a comma to custom series values when using completion." + tickets: [763788] + + - title: "CHM Input: Another workaround for a Microsoft mess." + tickets: [763336] + + - title: "Fix job count in the spinner not always being updated when a job completes" + + - title: "Changing case only of a title does not update title sort" + tickets: [768904] + + improved recipes: + - ecuisine.ro, egirl.ro and tabu.ro + - Daily Telegraph + - Handelsblatt + - Il Sole 24 Ore + - Newsweek + - Arcamax + + new recipes: + - title: BabyOnline.ro + author: Silviu Cotoara + + - title: "The Journal.ie" + author: Phil Burns + + - title: "Der Spiegel" + author: Nikolas Mangold + +- version: 0.7.56 + date: 2011-04-17 + + new features: + - title: "This is primarily a bug fix release that fixes a bug in 0.7.55 that caused calibre to rescan the files on the device every time the device is connected. If you updated to 0.7.55 it is highly recommended you update to 0.7.56" + + - title: "Device driver for Coby Kyros" + + - title: "Remove the quick access to search options from next to the search bar, as we now have a separate search highlights toggle button" + + - title: "MOBI Output: Ensure that MOBI files always have 8KB worth of null bytes at the end of record 0. This appears to be necessary for Amazon to be able to add DRM to calibre generated MOBI files sent to their publishing service." + + - title: "Add a tool to inspect MOBI files. To use: calibre-debug -m file.mobi" + + bug fixes: + - title: "Fixed regression taht caused calibre to rescan files on the device on every reconnect" + + - title: "Fix donate button causing the toolbar to be too large on OS X" + + - title: "MOBI Input: Fix detection of Table of Contents for MOBI files that have a page break between the location designated as the Table of Contents and the actual table of contents." + tickets: [763504] + + - title: "Comic Input: Fix handling of some CBZ files that have wrongly encoded non ASCII filenames on windows." + tickets: [763280] + + - title: "PML Input: Fix multi-line chapter title causing a spurious page break" + tickets: [763238] + + - title: "EPUB Input: Speed up processing of files with very large manifest/spines" + + - title: "Fix regression that broke cover:False searches in 0.7.55" + + improved recipes: + - Suedduetsche Zeitung + - Irish Times + - Big Oven + - NSPM + + +- version: 0.7.55 + date: 2011-04-15 + + new features: + - title: "Add a menu bar. Useful if you use a lot of plugins and are running out of space in your toolbars. By default the menu bar is hidden (except on OS X). You can add actions to it via Preferences->Toolbars. As soon as you add actions, it will become visible." + + - title: "OS X: Make the main calibre window look a little more 'native' on OS X" + + - title: "Show recently viewed books in the View button's drop down menu" + + - title: "Add a button next to the search bar to toggle easily between highlight and restrict search modes" + + - title: "Allow the use of arbitrary searches as search restrictions, rather than just saved searches. Do this by using the special entry '*Current Search' in the Search Restriction dropdown." + + - title: "The Connect/share icon now changes color to indicate that the content server is running" + tickets: [755444] + + - title: "Device drivers for Viewpad 7, Motorola Xoom and Asus Eee Note" + + - title: "Add tags like composite custom column." + tickets: [759663] + + - title: "Add a new date format code 'iso'. Permits formatting dates to see the complete time (via Preferences->Tweaks)" + + - title: "Allow the use of data from the size column in the template language" + tickets: [759645] + + - title: "Support reading/writing covers to txtz/htmlz files" + + - title: "Speedup for large library sorting when using composite custom columns" + + - title: "Move the boolean columns are tristate tweak to Preferences->Behavior" + + bug fixes: + - title: "Fix a regression in 0.7.54 that broke reading covers/metadata from cbz files." + tickets: [756892] + + - title: "Fix tweak names and help not translatable" + tickets: [756736] + + - title: "When the size of a book is less that 0.1MB but not zero, display the size as <0.1 instead of 0.0." + tickets: [755768] + + - title: "HTMLZ input: Fix handling of HTML files encoded in an encoding other than UTF-8" + + - title: "EPUB Input: Fix EPUB files with empty Adobe PAGE templates causing conversion to abort." + tickets: [760390] + + - title: "Fix CHM input plugin not closing opened input file" + tickets: [760589] + + - title: "MOBI Output: Make super/subscripts use a slightly smaller font when rendered on a Kindle. Also allow the use of vertical-align:top/bottom in the CSS to specify a super/subscript." + tickets: [758667] + + - title: "LRF Input: Detect and workaround LRF files that have deeply nested spans, instead of crashing." + tickets: [759680] + + - title: "MOBI Output: Fix bug that would cause conversion to unneccessarily abort when malformed hyperlinks are present in the input document." + tickets: [759313] + + - title: "Make true and false searches work correctly for numeric fields." + + - title: "MOBI Output: The Ignore margins setting no longer ignores blockquotes, only margins set via CSS on other elements." + tickets: [758675] + + - title: "Fix regression that caused clicking auto send to also change the email address in Preferences->Email" + + improved recipes: + - Wall Street Journal + - Weblogs SL + - Tabu.ro + - Vecernje Novosti + + new recipes: + - title: Hallo Assen and Dvhn + author: Reijendert + + +- version: 0.7.54 + date: 2011-04-08 + + new features: + - title: "New output format, HTMLZ which is a single HTML file with its associated images/stylesheets in a zipped up file" + description: "Useful when you want to convert your ebook into a single HTML file for easy editing. Note that this output plugin is still new and needs testing" + + - title: "When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa" + + - title: "Support for the Motorola Atrix" + + - title: "Allow the icons in the toolbar to be turned off completely via Preferences->Look & Feel" + + - title: "When downloading metadata use the gzip transfer encoding when possible for a speedup." + tickets: [749304] + + bug fixes: + - title: "Conversion pipeline: Workaround for bug in lxml that causes a massive mem leak on windows and OS X when the input document contains non ASCII CSS selectors." + tickets: [754555] + + - title: "Conversion pipeline: Handle inline + + +""" + for title, scalars, collections in self.get_namespaces(): + yield """ +

    %s

    + + + +""" % title + for i, (key, value) in enumerate(scalars): + colnum = i % 3 + if colnum == 0: yield """ + """ + yield """ + """ % vars() + if colnum == 2: yield """ + """ + + if colnum == 0: yield """ + + + """ + elif colnum == 1: yield """ + + """ + yield """ + +
    %(key)s%(value)s
    """ + + for subtitle, headers, subrows in collections: + yield """ +

    %s

    + + + """ % subtitle + for key in headers: + yield """ + """ % key + yield """ + + + """ + for subrow in subrows: + yield """ + """ + for value in subrow: + yield """ + """ % value + yield """ + """ + yield """ + +
    %s
    %s
    """ + yield """ + + +""" + index.exposed = True + + def get_namespaces(self): + """Yield (title, scalars, collections) for each namespace.""" + s = extrapolate_statistics(logging.statistics) + for title, ns in sorted(s.items()): + scalars = [] + collections = [] + ns_fmt = self.formatting.get(title, {}) + for k, v in sorted(ns.items()): + fmt = ns_fmt.get(k, {}) + if isinstance(v, dict): + headers, subrows = self.get_dict_collection(v, fmt) + collections.append((k, ['ID'] + headers, subrows)) + elif isinstance(v, (list, tuple)): + headers, subrows = self.get_list_collection(v, fmt) + collections.append((k, headers, subrows)) + else: + format = ns_fmt.get(k, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v = format(v) + elif format is not missing: + v = format % v + scalars.append((k, v)) + yield title, scalars, collections + + def get_dict_collection(self, v, formatting): + """Return ([headers], [rows]) for the given collection.""" + # E.g., the 'Requests' dict. + headers = [] + for record in v.itervalues(): + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for k2, record in sorted(v.items()): + subrow = [k2] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + def get_list_collection(self, v, formatting): + """Return ([headers], [subrows]) for the given collection.""" + # E.g., the 'Slow Queries' list. + headers = [] + for record in v: + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for record in v: + subrow = [] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + if json is not None: + def data(self): + s = extrapolate_statistics(logging.statistics) + cherrypy.response.headers['Content-Type'] = 'application/json' + return json.dumps(s, sort_keys=True, indent=4) + data.exposed = True + + def pause(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = False + raise cherrypy.HTTPRedirect('./') + pause.exposed = True + pause.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + + def resume(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = True + raise cherrypy.HTTPRedirect('./') + resume.exposed = True + resume.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + diff --git a/src/cherrypy/lib/cptools.py b/src/cherrypy/lib/cptools.py index b54019cb81..b426a3e784 100644 --- a/src/cherrypy/lib/cptools.py +++ b/src/cherrypy/lib/cptools.py @@ -1,22 +1,16 @@ """Functions for builtin CherryPy tools.""" import logging - -try: - # Python 2.5+ - from hashlib import md5 -except ImportError: - from md5 import new as md5 - import re import cherrypy -from cherrypy.lib import http as _http +from cherrypy._cpcompat import basestring, ntob, md5, set +from cherrypy.lib import httputil as _httputil # Conditional HTTP request support # -def validate_etags(autotags=False): +def validate_etags(autotags=False, debug=False): """Validate the current ETag against If-Match, If-None-Match headers. If autotags is True, an ETag response-header value will be provided @@ -30,42 +24,62 @@ def validate_etags(autotags=False): use for entity tags in a possibly destructive fashion. Likewise, if you raise 304 Not Modified, the response body will be empty, the ETag hash will be incorrect, and your application will break. - See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24 + See :rfc:`2616` Section 14.24. """ - response = cherrypy.response + response = cherrypy.serving.response # Guard against being run twice. if hasattr(response, "ETag"): return - status, reason, msg = _http.valid_status(response.status) + status, reason, msg = _httputil.valid_status(response.status) etag = response.headers.get('ETag') # Automatic ETag generation. See warning in docstring. - if (not etag) and autotags: - if status == 200: - etag = response.collapse_body() - etag = '"%s"' % md5(etag).hexdigest() - response.headers['ETag'] = etag + if etag: + if debug: + cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS') + elif not autotags: + if debug: + cherrypy.log('Autotags off', 'TOOLS.ETAGS') + elif status != 200: + if debug: + cherrypy.log('Status not 200', 'TOOLS.ETAGS') + else: + etag = response.collapse_body() + etag = '"%s"' % md5(etag).hexdigest() + if debug: + cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS') + response.headers['ETag'] = etag response.ETag = etag # "If the request would, without the If-Match header field, result in # anything other than a 2xx or 412 status, then the If-Match header # MUST be ignored." + if debug: + cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS') if status >= 200 and status <= 299: - request = cherrypy.request + request = cherrypy.serving.request conditions = request.headers.elements('If-Match') or [] conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') if conditions and not (conditions == ["*"] or etag in conditions): raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did " "not match %r" % (etag, conditions)) conditions = request.headers.elements('If-None-Match') or [] conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-None-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') if conditions == ["*"] or etag in conditions: + if debug: + cherrypy.log('request.method: %s' % request.method, 'TOOLS.ETAGS') if request.method in ("GET", "HEAD"): raise cherrypy.HTTPRedirect([], 304) else: @@ -78,12 +92,12 @@ def validate_since(): If no code has set the Last-Modified response header, then no validation will be performed. """ - response = cherrypy.response + response = cherrypy.serving.response lastmod = response.headers.get('Last-Modified') if lastmod: - status, reason, msg = _http.valid_status(response.status) + status, reason, msg = _httputil.valid_status(response.status) - request = cherrypy.request + request = cherrypy.serving.request since = request.headers.get('If-Unmodified-Since') if since and since != lastmod: @@ -101,16 +115,51 @@ def validate_since(): # Tool code # +def allow(methods=None, debug=False): + """Raise 405 if request.method not in methods (default ['GET', 'HEAD']). + + The given methods are case-insensitive, and may be in any order. + If only one method is allowed, you may supply a single string; + if more than one, supply a list of strings. + + Regardless of whether the current method is allowed or not, this + also emits an 'Allow' response header, containing the given methods. + """ + if not isinstance(methods, (tuple, list)): + methods = [methods] + methods = [m.upper() for m in methods if m] + if not methods: + methods = ['GET', 'HEAD'] + elif 'GET' in methods and 'HEAD' not in methods: + methods.append('HEAD') + + cherrypy.response.headers['Allow'] = ', '.join(methods) + if cherrypy.request.method not in methods: + if debug: + cherrypy.log('request.method %r not in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + raise cherrypy.HTTPError(405) + else: + if debug: + cherrypy.log('request.method %r in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + + def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', - scheme='X-Forwarded-Proto'): + scheme='X-Forwarded-Proto', debug=False): """Change the base URL (scheme://host[:port][/path]). For running a CP server behind Apache, lighttpd, or other HTTP server. + For Apache and lighttpd, you should leave the 'local' argument at the + default value of 'X-Forwarded-Host'. For Squid, you probably want to set + tools.proxy.local = 'Origin'. + If you want the new request.base to include path info (not just the host), you must explicitly set base to the full base path, and ALSO set 'local' to '', so that the X-Forwarded-Host request header (which never includes - path info) does not override it. + path info) does not override it. Regardless, the value for 'base' MUST + NOT end in a slash. cherrypy.request.remote.ip (the IP address of the client) will be rewritten if the header specified by the 'remote' arg is valid. @@ -118,10 +167,12 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', want to rewrite remote.ip, set the 'remote' arg to an empty string. """ - request = cherrypy.request + request = cherrypy.serving.request if scheme: s = request.headers.get(scheme, None) + if debug: + cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY') if s == 'on' and 'ssl' in scheme.lower(): # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header scheme = 'https' @@ -132,9 +183,13 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', scheme = request.base[:request.base.find("://")] if local: - base = request.headers.get(local, base) + lbase = request.headers.get(local, None) + if debug: + cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY') + if lbase is not None: + base = lbase.split(',')[0] if not base: - port = cherrypy.request.local.port + port = request.local.port if port == 80: base = '127.0.0.1' else: @@ -148,6 +203,8 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', if remote: xff = request.headers.get(remote) + if debug: + cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY') if xff: if remote == 'X-Forwarded-For': # See http://bob.pythonmac.org/archives/2005/09/23/apache-x-forwarded-for-caveat/ @@ -155,42 +212,64 @@ def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', request.remote.ip = xff -def ignore_headers(headers=('Range',)): +def ignore_headers(headers=('Range',), debug=False): """Delete request headers whose field names are included in 'headers'. This is a useful tool for working behind certain HTTP servers; for example, Apache duplicates the work that CP does for 'Range' headers, and will doubly-truncate the response. """ - request = cherrypy.request + request = cherrypy.serving.request for name in headers: if name in request.headers: + if debug: + cherrypy.log('Ignoring request header %r' % name, + 'TOOLS.IGNORE_HEADERS') del request.headers[name] -def response_headers(headers=None): +def response_headers(headers=None, debug=False): """Set headers on the response.""" + if debug: + cherrypy.log('Setting response headers: %s' % repr(headers), + 'TOOLS.RESPONSE_HEADERS') for name, value in (headers or []): - cherrypy.response.headers[name] = value + cherrypy.serving.response.headers[name] = value response_headers.failsafe = True def referer(pattern, accept=True, accept_missing=False, error=403, - message='Forbidden Referer header.'): + message='Forbidden Referer header.', debug=False): """Raise HTTPError if Referer header does/does not match the given pattern. - pattern: a regular expression pattern to test against the Referer. - accept: if True, the Referer must match the pattern; if False, + pattern + A regular expression pattern to test against the Referer. + + accept + If True, the Referer must match the pattern; if False, the Referer must NOT match the pattern. - accept_missing: if True, permit requests with no Referer header. - error: the HTTP error code to return to the client on failure. - message: a string to include in the response body on failure. + + accept_missing + If True, permit requests with no Referer header. + + error + The HTTP error code to return to the client on failure. + + message + A string to include in the response body on failure. + """ try: - match = bool(re.match(pattern, cherrypy.request.headers['Referer'])) + ref = cherrypy.serving.request.headers['Referer'] + match = bool(re.match(pattern, ref)) + if debug: + cherrypy.log('Referer %r matches %r' % (ref, pattern), + 'TOOLS.REFERER') if accept == match: return except KeyError: + if debug: + cherrypy.log('No Referer header', 'TOOLS.REFERER') if accept_missing: return @@ -201,6 +280,7 @@ class SessionAuth(object): """Assert that the user is logged in.""" session_key = "username" + debug = False def check_username_and_password(self, username, password): pass @@ -219,7 +299,7 @@ class SessionAuth(object): pass def login_screen(self, from_page='..', username='', error_msg='', **kwargs): - return """ + return ntob(""" Message: %(error_msg)s
    Login:
    @@ -228,20 +308,22 @@ Message: %(error_msg)s
    """ % {'from_page': from_page, 'username': username, - 'error_msg': error_msg} + 'error_msg': error_msg}, "utf-8") def do_login(self, username, password, from_page='..', **kwargs): """Login. May raise redirect, or return True if request handled.""" + response = cherrypy.serving.response error_msg = self.check_username_and_password(username, password) if error_msg: body = self.login_screen(from_page, username, error_msg) - cherrypy.response.body = body - if cherrypy.response.headers.has_key("Content-Length"): + response.body = body + if "Content-Length" in response.headers: # Delete Content-Length header so finalize() recalcs it. - del cherrypy.response.headers["Content-Length"] + del response.headers["Content-Length"] return True else: - cherrypy.session[self.session_key] = cherrypy.request.login = username + cherrypy.serving.request.login = username + cherrypy.session[self.session_key] = username self.on_login(username) raise cherrypy.HTTPRedirect(from_page or "/") @@ -251,43 +333,70 @@ Message: %(error_msg)s username = sess.get(self.session_key) sess[self.session_key] = None if username: - cherrypy.request.login = None + cherrypy.serving.request.login = None self.on_logout(username) raise cherrypy.HTTPRedirect(from_page) def do_check(self): """Assert username. May raise redirect, or return True if request handled.""" sess = cherrypy.session - request = cherrypy.request + request = cherrypy.serving.request + response = cherrypy.serving.response username = sess.get(self.session_key) if not username: sess[self.session_key] = username = self.anonymous() + if self.debug: + cherrypy.log('No session[username], trying anonymous', 'TOOLS.SESSAUTH') if not username: - cherrypy.response.body = self.login_screen(cherrypy.url(qs=request.query_string)) - if cherrypy.response.headers.has_key("Content-Length"): + url = cherrypy.url(qs=request.query_string) + if self.debug: + cherrypy.log('No username, routing to login_screen with ' + 'from_page %r' % url, 'TOOLS.SESSAUTH') + response.body = self.login_screen(url) + if "Content-Length" in response.headers: # Delete Content-Length header so finalize() recalcs it. - del cherrypy.response.headers["Content-Length"] + del response.headers["Content-Length"] return True - cherrypy.request.login = username + if self.debug: + cherrypy.log('Setting request.login to %r' % username, 'TOOLS.SESSAUTH') + request.login = username self.on_check(username) def run(self): - request = cherrypy.request + request = cherrypy.serving.request + response = cherrypy.serving.response + path = request.path_info if path.endswith('login_screen'): + if self.debug: + cherrypy.log('routing %r to login_screen' % path, 'TOOLS.SESSAUTH') return self.login_screen(**request.params) elif path.endswith('do_login'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + if self.debug: + cherrypy.log('do_login requires POST', 'TOOLS.SESSAUTH') + raise cherrypy.HTTPError(405) + if self.debug: + cherrypy.log('routing %r to do_login' % path, 'TOOLS.SESSAUTH') return self.do_login(**request.params) elif path.endswith('do_logout'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + raise cherrypy.HTTPError(405) + if self.debug: + cherrypy.log('routing %r to do_logout' % path, 'TOOLS.SESSAUTH') return self.do_logout(**request.params) else: + if self.debug: + cherrypy.log('No special path, running do_check', 'TOOLS.SESSAUTH') return self.do_check() def session_auth(**kwargs): sa = SessionAuth() - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(sa, k, v) return sa.run() session_auth.__doc__ = """Session authentication hook. @@ -299,59 +408,69 @@ to this function: for k in dir(SessionAuth) if not k.startswith("__")]) -def log_traceback(severity=logging.DEBUG): +def log_traceback(severity=logging.ERROR, debug=False): """Write the last error's traceback to the cherrypy error log.""" cherrypy.log("", "HTTP", severity=severity, traceback=True) -def log_request_headers(): +def log_request_headers(debug=False): """Write request headers to the cherrypy error log.""" - h = [" %s: %s" % (k, v) for k, v in cherrypy.request.header_list] + h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list] cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP") -def log_hooks(): +def log_hooks(debug=False): """Write request.hooks to the cherrypy error log.""" + request = cherrypy.serving.request + msg = [] # Sort by the standard points if possible. from cherrypy import _cprequest points = _cprequest.hookpoints - for k in cherrypy.request.hooks.keys(): + for k in request.hooks.keys(): if k not in points: points.append(k) for k in points: msg.append(" %s:" % k) - v = cherrypy.request.hooks.get(k, []) + v = request.hooks.get(k, []) v.sort() for h in v: msg.append(" %r" % h) cherrypy.log('\nRequest Hooks for ' + cherrypy.url() + ':\n' + '\n'.join(msg), "HTTP") -def redirect(url='', internal=True): +def redirect(url='', internal=True, debug=False): """Raise InternalRedirect or HTTPRedirect to the given url.""" + if debug: + cherrypy.log('Redirecting %sto: %s' % + ({True: 'internal ', False: ''}[internal], url), + 'TOOLS.REDIRECT') if internal: raise cherrypy.InternalRedirect(url) else: raise cherrypy.HTTPRedirect(url) -def trailing_slash(missing=True, extra=False): +def trailing_slash(missing=True, extra=False, status=None, debug=False): """Redirect if path_info has (missing|extra) trailing slash.""" - request = cherrypy.request + request = cherrypy.serving.request pi = request.path_info + if debug: + cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' % + (request.is_index, missing, extra, pi), + 'TOOLS.TRAILING_SLASH') if request.is_index is True: if missing: if not pi.endswith('/'): new_url = cherrypy.url(pi + '/', request.query_string) - raise cherrypy.HTTPRedirect(new_url) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) elif request.is_index is False: if extra: # If pi == '/', don't redirect to ''! if pi.endswith('/') and pi != '/': new_url = cherrypy.url(pi[:-1], request.query_string) - raise cherrypy.HTTPRedirect(new_url) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) -def flatten(): +def flatten(debug=False): """Wrap response.body in a generator that recursively iterates over body. This allows cherrypy.response.body to consist of 'nested generators'; @@ -359,23 +478,28 @@ def flatten(): """ import types def flattener(input): + numchunks = 0 for x in input: if not isinstance(x, types.GeneratorType): + numchunks += 1 yield x else: for y in flattener(x): - yield y - response = cherrypy.response + numchunks += 1 + yield y + if debug: + cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN') + response = cherrypy.serving.response response.body = flattener(response.body) -def accept(media=None): +def accept(media=None, debug=False): """Return the client's preferred media-type (from the given Content-Types). If 'media' is None (the default), no test will be performed. If 'media' is provided, it should be the Content-Type value (as a string) - or values (as a list or tuple of strings) which the current request + or values (as a list or tuple of strings) which the current resource can emit. The client's acceptable media ranges (as declared in the Accept request header) will be matched in order to these Content-Type values; the first such string is returned. That is, the return value @@ -397,12 +521,15 @@ def accept(media=None): return if isinstance(media, basestring): media = [media] + request = cherrypy.serving.request # Parse the Accept request header, and try to match one # of the requested media-ranges (in order of preference). - ranges = cherrypy.request.headers.elements('Accept') + ranges = request.headers.elements('Accept') if not ranges: # Any media type is acceptable. + if debug: + cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT') return media[0] else: # Note that 'ranges' is sorted in order of preference @@ -410,20 +537,28 @@ def accept(media=None): if element.qvalue > 0: if element.value == "*/*": # Matches any type or subtype + if debug: + cherrypy.log('Match due to */*', 'TOOLS.ACCEPT') return media[0] elif element.value.endswith("/*"): # Matches any subtype mtype = element.value[:-1] # Keep the slash for m in media: if m.startswith(mtype): + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') return m else: # Matches exact value if element.value in media: + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') return element.value # No suitable media-range found. - ah = cherrypy.request.headers.get('Accept') + ah = request.headers.get('Accept') if ah is None: msg = "Your client did not send an Accept header." else: @@ -432,3 +567,51 @@ def accept(media=None): ", ".join(media)) raise cherrypy.HTTPError(406, msg) + +class MonitoredHeaderMap(_httputil.HeaderMap): + + def __init__(self): + self.accessed_headers = set() + + def __getitem__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__getitem__(self, key) + + def __contains__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__contains__(self, key) + + def get(self, key, default=None): + self.accessed_headers.add(key) + return _httputil.HeaderMap.get(self, key, default=default) + + if hasattr({}, 'has_key'): + # Python 2 + def has_key(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.has_key(self, key) + + +def autovary(ignore=None, debug=False): + """Auto-populate the Vary response header based on request.header access.""" + request = cherrypy.serving.request + + req_h = request.headers + request.headers = MonitoredHeaderMap() + request.headers.update(req_h) + if ignore is None: + ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type']) + + def set_response_header(): + resp_h = cherrypy.serving.response.headers + v = set([e.value for e in resp_h.elements('Vary')]) + if debug: + cherrypy.log('Accessed headers: %s' % request.headers.accessed_headers, + 'TOOLS.AUTOVARY') + v = v.union(request.headers.accessed_headers) + v = v.difference(ignore) + v = list(v) + v.sort() + resp_h['Vary'] = ', '.join(v) + request.hooks.attach('before_finalize', set_response_header, 95) + diff --git a/src/cherrypy/lib/encoding.py b/src/cherrypy/lib/encoding.py index 94dc908e08..6459746509 100644 --- a/src/cherrypy/lib/encoding.py +++ b/src/cherrypy/lib/encoding.py @@ -2,173 +2,237 @@ import struct import time import cherrypy +from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr +from cherrypy.lib import file_generator +from cherrypy.lib import set_vary_header def decode(encoding=None, default_encoding='utf-8'): - """Decode cherrypy.request.params from str to unicode objects.""" - if not encoding: - ct = cherrypy.request.headers.elements("Content-Type") + """Replace or extend the list of charsets used to decode a request entity. + + Either argument may be a single string or a list of strings. + + encoding + If not None, restricts the set of charsets attempted while decoding + a request entity to the given set (even if a different charset is given in + the Content-Type request header). + + default_encoding + Only in effect if the 'encoding' argument is not given. + If given, the set of charsets attempted while decoding a request entity is + *extended* with the given value(s). + + """ + body = cherrypy.request.body + if encoding is not None: + if not isinstance(encoding, list): + encoding = [encoding] + body.attempt_charsets = encoding + elif default_encoding: + if not isinstance(default_encoding, list): + default_encoding = [default_encoding] + body.attempt_charsets = body.attempt_charsets + default_encoding + + +class ResponseEncoder: + + default_encoding = 'utf-8' + failmsg = "Response body could not be encoded with %r." + encoding = None + errors = 'strict' + text_only = True + add_charset = True + debug = False + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + self.attempted_charsets = set() + request = cherrypy.serving.request + if request.handler is not None: + # Replace request.handler with self + if self.debug: + cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') + self.oldhandler = request.handler + request.handler = self + + def encode_stream(self, encoding): + """Encode a streaming response body. + + Use a generator wrapper, and just pray it works as the stream is + being written out. + """ + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + def encoder(body): + for chunk in body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + yield chunk + self.body = encoder(self.body) + return True + + def encode_string(self, encoding): + """Encode a buffered response body.""" + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + try: + body = [] + for chunk in self.body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + body.append(chunk) + self.body = body + except (LookupError, UnicodeError): + return False + else: + return True + + def find_acceptable_charset(self): + request = cherrypy.serving.request + response = cherrypy.serving.response + + if self.debug: + cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE') + if response.stream: + encoder = self.encode_stream + else: + encoder = self.encode_string + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + # Encoded strings may be of different lengths from their + # unicode equivalents, and even from each other. For example: + # >>> t = u"\u7007\u3040" + # >>> len(t) + # 2 + # >>> len(t.encode("UTF-8")) + # 6 + # >>> len(t.encode("utf7")) + # 8 + del response.headers["Content-Length"] + + # Parse the Accept-Charset request header, and try to provide one + # of the requested charsets (in order of user preference). + encs = request.headers.elements('Accept-Charset') + charsets = [enc.value.lower() for enc in encs] + if self.debug: + cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') + + if self.encoding is not None: + # If specified, force this encoding to be used, or fail. + encoding = self.encoding.lower() + if self.debug: + cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE') + if (not charsets) or "*" in charsets or encoding in charsets: + if self.debug: + cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + else: + if not encs: + if self.debug: + cherrypy.log('Attempting default encoding %r' % + self.default_encoding, 'TOOLS.ENCODE') + # Any character-set is acceptable. + if encoder(self.default_encoding): + return self.default_encoding + else: + raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding) + else: + for element in encs: + if element.qvalue > 0: + if element.value == "*": + # Matches any charset. Try our default. + if self.debug: + cherrypy.log('Attempting default encoding due ' + 'to %r' % element, 'TOOLS.ENCODE') + if encoder(self.default_encoding): + return self.default_encoding + else: + encoding = element.value + if self.debug: + cherrypy.log('Attempting encoding %s (qvalue >' + '0)' % element, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + + if "*" not in charsets: + # If no "*" is present in an Accept-Charset field, then all + # character sets not explicitly mentioned get a quality + # value of 0, except for ISO-8859-1, which gets a quality + # value of 1 if not explicitly mentioned. + iso = 'iso-8859-1' + if iso not in charsets: + if self.debug: + cherrypy.log('Attempting ISO-8859-1 encoding', + 'TOOLS.ENCODE') + if encoder(iso): + return iso + + # No suitable encoding found. + ac = request.headers.get('Accept-Charset') + if ac is None: + msg = "Your client did not send an Accept-Charset header." + else: + msg = "Your client sent this Accept-Charset header: %s." % ac + msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets) + raise cherrypy.HTTPError(406, msg) + + def __call__(self, *args, **kwargs): + response = cherrypy.serving.response + self.body = self.oldhandler(*args, **kwargs) + + if isinstance(self.body, basestring): + # strings get wrapped in a list because iterating over a single + # item list is much faster than iterating over every character + # in a long string. + if self.body: + self.body = [self.body] + else: + # [''] doesn't evaluate to False, so replace it with []. + self.body = [] + elif hasattr(self.body, 'read'): + self.body = file_generator(self.body) + elif self.body is None: + self.body = [] + + ct = response.headers.elements("Content-Type") + if self.debug: + cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE') if ct: ct = ct[0] - encoding = ct.params.get("charset", None) - if (not encoding) and ct.value.lower().startswith("text/"): - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 - # When no explicit charset parameter is provided by the - # sender, media subtypes of the "text" type are defined - # to have a default charset value of "ISO-8859-1" when - # received via HTTP. - encoding = "ISO-8859-1" - - if not encoding: - encoding = default_encoding - - try: - decode_params(encoding) - except UnicodeDecodeError: - # IE and Firefox don't supply a charset when submitting form - # params with a CT of application/x-www-form-urlencoded. - # So after all our guessing, it could *still* be wrong. - # Start over with ISO-8859-1, since that seems to be preferred. - decode_params("ISO-8859-1") - -def decode_params(encoding): - decoded_params = {} - for key, value in cherrypy.request.params.items(): - if not hasattr(value, 'file'): - # Skip the value if it is an uploaded file - if isinstance(value, list): - # value is a list: decode each element - value = [v.decode(encoding) for v in value] - elif isinstance(value, str): - # value is a regular string: decode it - value = value.decode(encoding) - decoded_params[key] = value - - # Decode all or nothing, so we can try again on error. - cherrypy.request.params = decoded_params - - -# Encoding - -def encode(encoding=None, errors='strict', text_only=True, add_charset=True): - # Guard against running twice - if getattr(cherrypy.request, "_encoding_attempted", False): - return - cherrypy.request._encoding_attempted = True - - ct = cherrypy.response.headers.elements("Content-Type") - if ct: - ct = ct[0] - if (not text_only) or ct.value.lower().startswith("text/"): - # Set "charset=..." param on response Content-Type header - ct.params['charset'] = find_acceptable_charset(encoding, errors=errors) - if add_charset: - cherrypy.response.headers["Content-Type"] = str(ct) - -def encode_stream(encoding, errors='strict'): - """Encode a streaming response body. - - Use a generator wrapper, and just pray it works as the stream is - being written out. - """ - def encoder(body): - for chunk in body: - if isinstance(chunk, unicode): - chunk = chunk.encode(encoding, errors) - yield chunk - cherrypy.response.body = encoder(cherrypy.response.body) - return True - -def encode_string(encoding, errors='strict'): - """Encode a buffered response body.""" - try: - body = [] - for chunk in cherrypy.response.body: - if isinstance(chunk, unicode): - chunk = chunk.encode(encoding, errors) - body.append(chunk) - cherrypy.response.body = body - except (LookupError, UnicodeError): - return False - else: - return True - -def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'): - response = cherrypy.response - - if cherrypy.response.stream: - encoder = encode_stream - else: - response.collapse_body() - encoder = encode_string - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - # Encoded strings may be of different lengths from their - # unicode equivalents, and even from each other. For example: - # >>> t = u"\u7007\u3040" - # >>> len(t) - # 2 - # >>> len(t.encode("UTF-8")) - # 6 - # >>> len(t.encode("utf7")) - # 8 - del response.headers["Content-Length"] - - # Parse the Accept-Charset request header, and try to provide one - # of the requested charsets (in order of user preference). - encs = cherrypy.request.headers.elements('Accept-Charset') - charsets = [enc.value.lower() for enc in encs] - attempted_charsets = [] - - if encoding is not None: - # If specified, force this encoding to be used, or fail. - encoding = encoding.lower() - if (not charsets) or "*" in charsets or encoding in charsets: - if encoder(encoding, errors): - return encoding - else: - if not encs: - # Any character-set is acceptable. - if encoder(default_encoding, errors): - return default_encoding + if self.text_only: + if ct.value.lower().startswith("text/"): + if self.debug: + cherrypy.log('Content-Type %s starts with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = True + else: + if self.debug: + cherrypy.log('Not finding because Content-Type %s does ' + 'not start with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = False else: - raise cherrypy.HTTPError(500, failmsg % default_encoding) - else: - if "*" not in charsets: - # If no "*" is present in an Accept-Charset field, then all - # character sets not explicitly mentioned get a quality - # value of 0, except for ISO-8859-1, which gets a quality - # value of 1 if not explicitly mentioned. - iso = 'iso-8859-1' - if iso not in charsets: - attempted_charsets.append(iso) - if encoder(iso, errors): - return iso + if self.debug: + cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE') + do_find = True - for element in encs: - if element.qvalue > 0: - if element.value == "*": - # Matches any charset. Try our default. - if default_encoding not in attempted_charsets: - attempted_charsets.append(default_encoding) - if encoder(default_encoding, errors): - return default_encoding - else: - encoding = element.value - if encoding not in attempted_charsets: - attempted_charsets.append(encoding) - if encoder(encoding, errors): - return encoding - - # No suitable encoding found. - ac = cherrypy.request.headers.get('Accept-Charset') - if ac is None: - msg = "Your client did not send an Accept-Charset header." - else: - msg = "Your client sent this Accept-Charset header: %s." % ac - msg += " We tried these charsets: %s." % ", ".join(attempted_charsets) - raise cherrypy.HTTPError(406, msg) - + if do_find: + # Set "charset=..." param on response Content-Type header + ct.params['charset'] = self.find_acceptable_charset() + if self.add_charset: + if self.debug: + cherrypy.log('Setting Content-Type %s' % ct, + 'TOOLS.ENCODE') + response.headers["Content-Type"] = str(ct) + + return self.body # GZIP @@ -176,14 +240,16 @@ def compress(body, compress_level): """Compress 'body' at the given compress_level.""" import zlib - yield '\037\213' # magic header - yield '\010' # compression method - yield '\0' - yield struct.pack(" 0 is present * The 'identity' value is given with a qvalue > 0. + """ - response = cherrypy.response + request = cherrypy.serving.request + response = cherrypy.serving.response + + set_vary_header(response, "Accept-Encoding") + if not response.body: # Response body is empty (might be a 304 for instance) + if debug: + cherrypy.log('No response body', context='TOOLS.GZIP') return # If returning cached content (which should already have been gzipped), # don't re-zip. - if getattr(cherrypy.request, "cached", False): + if getattr(request, "cached", False): + if debug: + cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') return - acceptable = cherrypy.request.headers.elements('Accept-Encoding') + acceptable = request.headers.elements('Accept-Encoding') if not acceptable: # If no Accept-Encoding field is present in a request, # the server MAY assume that the client will accept any @@ -239,27 +322,67 @@ def gzip(compress_level=9, mime_types=['text/html', 'text/plain']): # the "identity" content-coding, unless it has additional # information that a different content-coding is meaningful # to the client. + if debug: + cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') return ct = response.headers.get('Content-Type', '').split(';')[0] for coding in acceptable: if coding.value == 'identity' and coding.qvalue != 0: + if debug: + cherrypy.log('Non-zero identity qvalue: %s' % coding, + context='TOOLS.GZIP') return if coding.value in ('gzip', 'x-gzip'): if coding.qvalue == 0: + if debug: + cherrypy.log('Zero gzip qvalue: %s' % coding, + context='TOOLS.GZIP') return - if ct in mime_types: - # Return a generator that compresses the page - varies = response.headers.get("Vary", "") - varies = [x.strip() for x in varies.split(",") if x.strip()] - if "Accept-Encoding" not in varies: - varies.append("Accept-Encoding") - response.headers['Vary'] = ", ".join(varies) - - response.headers['Content-Encoding'] = 'gzip' - response.body = compress(response.body, compress_level) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] + + if ct not in mime_types: + # If the list of provided mime-types contains tokens + # such as 'text/*' or 'application/*+xml', + # we go through them and find the most appropriate one + # based on the given content-type. + # The pattern matching is only caring about the most + # common cases, as stated above, and doesn't support + # for extra parameters. + found = False + if '/' in ct: + ct_media_type, ct_sub_type = ct.split('/') + for mime_type in mime_types: + if '/' in mime_type: + media_type, sub_type = mime_type.split('/') + if ct_media_type == media_type: + if sub_type == '*': + found = True + break + elif '+' in sub_type and '+' in ct_sub_type: + ct_left, ct_right = ct_sub_type.split('+') + left, right = sub_type.split('+') + if left == '*' and ct_right == right: + found = True + break + + if not found: + if debug: + cherrypy.log('Content-Type %s not in mime_types %r' % + (ct, mime_types), context='TOOLS.GZIP') + return + + if debug: + cherrypy.log('Gzipping', context='TOOLS.GZIP') + # Return a generator that compresses the page + response.headers['Content-Encoding'] = 'gzip' + response.body = compress(response.body, compress_level) + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return + + if debug: + cherrypy.log('No acceptable encoding found.', context='GZIP') cherrypy.HTTPError(406, "identity, gzip").set_response() + diff --git a/src/cherrypy/lib/gctools.py b/src/cherrypy/lib/gctools.py new file mode 100644 index 0000000000..183148b212 --- /dev/null +++ b/src/cherrypy/lib/gctools.py @@ -0,0 +1,214 @@ +import gc +import inspect +import os +import sys +import time + +try: + import objgraph +except ImportError: + objgraph = None + +import cherrypy +from cherrypy import _cprequest, _cpwsgi +from cherrypy.process.plugins import SimplePlugin + + +class ReferrerTree(object): + """An object which gathers all referrers of an object to a given depth.""" + + peek_length = 40 + + def __init__(self, ignore=None, maxdepth=2, maxparents=10): + self.ignore = ignore or [] + self.ignore.append(inspect.currentframe().f_back) + self.maxdepth = maxdepth + self.maxparents = maxparents + + def ascend(self, obj, depth=1): + """Return a nested list containing referrers of the given object.""" + depth += 1 + parents = [] + + # Gather all referrers in one step to minimize + # cascading references due to repr() logic. + refs = gc.get_referrers(obj) + self.ignore.append(refs) + if len(refs) > self.maxparents: + return [("[%s referrers]" % len(refs), [])] + + try: + ascendcode = self.ascend.__code__ + except AttributeError: + ascendcode = self.ascend.im_func.func_code + for parent in refs: + if inspect.isframe(parent) and parent.f_code is ascendcode: + continue + if parent in self.ignore: + continue + if depth <= self.maxdepth: + parents.append((parent, self.ascend(parent, depth))) + else: + parents.append((parent, [])) + + return parents + + def peek(self, s): + """Return s, restricted to a sane length.""" + if len(s) > (self.peek_length + 3): + half = self.peek_length // 2 + return s[:half] + '...' + s[-half:] + else: + return s + + def _format(self, obj, descend=True): + """Return a string representation of a single object.""" + if inspect.isframe(obj): + filename, lineno, func, context, index = inspect.getframeinfo(obj) + return "" % func + + if not descend: + return self.peek(repr(obj)) + + if isinstance(obj, dict): + return "{" + ", ".join(["%s: %s" % (self._format(k, descend=False), + self._format(v, descend=False)) + for k, v in obj.items()]) + "}" + elif isinstance(obj, list): + return "[" + ", ".join([self._format(item, descend=False) + for item in obj]) + "]" + elif isinstance(obj, tuple): + return "(" + ", ".join([self._format(item, descend=False) + for item in obj]) + ")" + + r = self.peek(repr(obj)) + if isinstance(obj, (str, int, float)): + return r + return "%s: %s" % (type(obj), r) + + def format(self, tree): + """Return a list of string reprs from a nested list of referrers.""" + output = [] + def ascend(branch, depth=1): + for parent, grandparents in branch: + output.append((" " * depth) + self._format(parent)) + if grandparents: + ascend(grandparents, depth + 1) + ascend(tree) + return output + + +def get_instances(cls): + return [x for x in gc.get_objects() if isinstance(x, cls)] + + +class RequestCounter(SimplePlugin): + + def start(self): + self.count = 0 + + def before_request(self): + self.count += 1 + + def after_request(self): + self.count -=1 +request_counter = RequestCounter(cherrypy.engine) +request_counter.subscribe() + + +def get_context(obj): + if isinstance(obj, _cprequest.Request): + return "path=%s;stage=%s" % (obj.path_info, obj.stage) + elif isinstance(obj, _cprequest.Response): + return "status=%s" % obj.status + elif isinstance(obj, _cpwsgi.AppResponse): + return "PATH_INFO=%s" % obj.environ.get('PATH_INFO', '') + elif hasattr(obj, "tb_lineno"): + return "tb_lineno=%s" % obj.tb_lineno + return "" + + +class GCRoot(object): + """A CherryPy page handler for testing reference leaks.""" + + classes = [(_cprequest.Request, 2, 2, + "Should be 1 in this request thread and 1 in the main thread."), + (_cprequest.Response, 2, 2, + "Should be 1 in this request thread and 1 in the main thread."), + (_cpwsgi.AppResponse, 1, 1, + "Should be 1 in this request thread only."), + ] + + def index(self): + return "Hello, world!" + index.exposed = True + + def stats(self): + output = ["Statistics:"] + + for trial in range(10): + if request_counter.count > 0: + break + time.sleep(0.5) + else: + output.append("\nNot all requests closed properly.") + + # gc_collect isn't perfectly synchronous, because it may + # break reference cycles that then take time to fully + # finalize. Call it thrice and hope for the best. + gc.collect() + gc.collect() + unreachable = gc.collect() + if unreachable: + if objgraph is not None: + final = objgraph.by_type('Nondestructible') + if final: + objgraph.show_backrefs(final, filename='finalizers.png') + + trash = {} + for x in gc.garbage: + trash[type(x)] = trash.get(type(x), 0) + 1 + if trash: + output.insert(0, "\n%s unreachable objects:" % unreachable) + trash = [(v, k) for k, v in trash.items()] + trash.sort() + for pair in trash: + output.append(" " + repr(pair)) + + # Check declared classes to verify uncollected instances. + # These don't have to be part of a cycle; they can be + # any objects that have unanticipated referrers that keep + # them from being collected. + allobjs = {} + for cls, minobj, maxobj, msg in self.classes: + allobjs[cls] = get_instances(cls) + + for cls, minobj, maxobj, msg in self.classes: + objs = allobjs[cls] + lenobj = len(objs) + if lenobj < minobj or lenobj > maxobj: + if minobj == maxobj: + output.append( + "\nExpected %s %r references, got %s." % + (minobj, cls, lenobj)) + else: + output.append( + "\nExpected %s to %s %r references, got %s." % + (minobj, maxobj, cls, lenobj)) + + for obj in objs: + if objgraph is not None: + ig = [id(objs), id(inspect.currentframe())] + fname = "graph_%s_%s.png" % (cls.__name__, id(obj)) + objgraph.show_backrefs( + obj, extra_ignore=ig, max_depth=4, too_many=20, + filename=fname, extra_info=get_context) + output.append("\nReferrers for %s (refcount=%s):" % + (repr(obj), sys.getrefcount(obj))) + t = ReferrerTree(ignore=[objs], maxdepth=3) + tree = t.ascend(obj) + output.extend(t.format(tree)) + + return "\n".join(output) + stats.exposed = True + diff --git a/src/cherrypy/lib/http.py b/src/cherrypy/lib/http.py index 82dfa5bf80..4661d69e28 100644 --- a/src/cherrypy/lib/http.py +++ b/src/cherrypy/lib/http.py @@ -1,405 +1,7 @@ -"""HTTP library functions.""" +import warnings +warnings.warn('cherrypy.lib.http has been deprecated and will be removed ' + 'in CherryPy 3.3 use cherrypy.lib.httputil instead.', + DeprecationWarning) -# This module contains functions for building an HTTP application -# framework: any one, not just one whose name starts with "Ch". ;) If you -# reference any modules from some popular framework inside *this* module, -# FuManChu will personally hang you up by your thumbs and submit you -# to a public caning. +from cherrypy.lib.httputil import * -from BaseHTTPServer import BaseHTTPRequestHandler -response_codes = BaseHTTPRequestHandler.responses.copy() - -# From http://www.cherrypy.org/ticket/361 -response_codes[500] = ('Internal Server Error', - 'The server encountered an unexpected condition ' - 'which prevented it from fulfilling the request.') -response_codes[503] = ('Service Unavailable', - 'The server is currently unable to handle the ' - 'request due to a temporary overloading or ' - 'maintenance of the server.') - - -import cgi -import re -from rfc822 import formatdate as HTTPDate - - -def urljoin(*atoms): - """Return the given path *atoms, joined into a single URL. - - This will correctly join a SCRIPT_NAME and PATH_INFO into the - original URL, even if either atom is blank. - """ - url = "/".join([x for x in atoms if x]) - while "//" in url: - url = url.replace("//", "/") - # Special-case the final url of "", and return "/" instead. - return url or "/" - -def protocol_from_http(protocol_str): - """Return a protocol tuple from the given 'HTTP/x.y' string.""" - return int(protocol_str[5]), int(protocol_str[7]) - -def get_ranges(headervalue, content_length): - """Return a list of (start, stop) indices from a Range header, or None. - - Each (start, stop) tuple will be composed of two ints, which are suitable - for use in a slicing operation. That is, the header "Range: bytes=3-6", - if applied against a Python string, is requesting resource[3:7]. This - function will return the list [(3, 7)]. - - If this function returns an empty list, you should return HTTP 416. - """ - - if not headervalue: - return None - - result = [] - bytesunit, byteranges = headervalue.split("=", 1) - for brange in byteranges.split(","): - start, stop = [x.strip() for x in brange.split("-", 1)] - if start: - if not stop: - stop = content_length - 1 - start, stop = map(int, (start, stop)) - if start >= content_length: - # From rfc 2616 sec 14.16: - # "If the server receives a request (other than one - # including an If-Range request-header field) with an - # unsatisfiable Range request-header field (that is, - # all of whose byte-range-spec values have a first-byte-pos - # value greater than the current length of the selected - # resource), it SHOULD return a response code of 416 - # (Requested range not satisfiable)." - continue - if stop < start: - # From rfc 2616 sec 14.16: - # "If the server ignores a byte-range-spec because it - # is syntactically invalid, the server SHOULD treat - # the request as if the invalid Range header field - # did not exist. (Normally, this means return a 200 - # response containing the full entity)." - return None - result.append((start, stop + 1)) - else: - if not stop: - # See rfc quote above. - return None - # Negative subscript (last N bytes) - result.append((content_length - int(stop), content_length)) - - return result - - -class HeaderElement(object): - """An element (with parameters) from an HTTP header's element list.""" - - def __init__(self, value, params=None): - self.value = value - if params is None: - params = {} - self.params = params - - def __unicode__(self): - p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()] - return u"%s%s" % (self.value, "".join(p)) - - def __str__(self): - return str(self.__unicode__()) - - def parse(elementstr): - """Transform 'token;key=val' to ('token', {'key': 'val'}).""" - # Split the element into a value and parameters. The 'value' may - # be of the form, "token=token", but we don't split that here. - atoms = [x.strip() for x in elementstr.split(";") if x.strip()] - initial_value = atoms.pop(0).strip() - params = {} - for atom in atoms: - atom = [x.strip() for x in atom.split("=", 1) if x.strip()] - key = atom.pop(0) - if atom: - val = atom[0] - else: - val = "" - params[key] = val - return initial_value, params - parse = staticmethod(parse) - - def from_str(cls, elementstr): - """Construct an instance from a string of the form 'token;key=val'.""" - ival, params = cls.parse(elementstr) - return cls(ival, params) - from_str = classmethod(from_str) - - -q_separator = re.compile(r'; *q *=') - -class AcceptElement(HeaderElement): - """An element (with parameters) from an Accept* header's element list. - - AcceptElement objects are comparable; the more-preferred object will be - "less than" the less-preferred object. They are also therefore sortable; - if you sort a list of AcceptElement objects, they will be listed in - priority order; the most preferred value will be first. Yes, it should - have been the other way around, but it's too late to fix now. - """ - - def from_str(cls, elementstr): - qvalue = None - # The first "q" parameter (if any) separates the initial - # media-range parameter(s) (if any) from the accept-params. - atoms = q_separator.split(elementstr, 1) - media_range = atoms.pop(0).strip() - if atoms: - # The qvalue for an Accept header can have extensions. The other - # headers cannot, but it's easier to parse them as if they did. - qvalue = HeaderElement.from_str(atoms[0].strip()) - - media_type, params = cls.parse(media_range) - if qvalue is not None: - params["q"] = qvalue - return cls(media_type, params) - from_str = classmethod(from_str) - - def qvalue(self): - val = self.params.get("q", "1") - if isinstance(val, HeaderElement): - val = val.value - return float(val) - qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") - - def __cmp__(self, other): - diff = cmp(other.qvalue, self.qvalue) - if diff == 0: - diff = cmp(str(other), str(self)) - return diff - - -def header_elements(fieldname, fieldvalue): - """Return a HeaderElement list from a comma-separated header str.""" - - if not fieldvalue: - return None - headername = fieldname.lower() - - result = [] - for element in fieldvalue.split(","): - if headername.startswith("accept") or headername == 'te': - hv = AcceptElement.from_str(element) - else: - hv = HeaderElement.from_str(element) - result.append(hv) - - result.sort() - return result - -def decode_TEXT(value): - """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr").""" - from email.Header import decode_header - atoms = decode_header(value) - decodedvalue = "" - for atom, charset in atoms: - if charset is not None: - atom = atom.decode(charset) - decodedvalue += atom - return decodedvalue - -def valid_status(status): - """Return legal HTTP status Code, Reason-phrase and Message. - - The status arg must be an int, or a str that begins with an int. - - If status is an int, or a str and no reason-phrase is supplied, - a default reason-phrase will be provided. - """ - - if not status: - status = 200 - - status = str(status) - parts = status.split(" ", 1) - if len(parts) == 1: - # No reason supplied. - code, = parts - reason = None - else: - code, reason = parts - reason = reason.strip() - - try: - code = int(code) - except ValueError: - raise ValueError("Illegal response status from server " - "(%s is non-numeric)." % repr(code)) - - if code < 100 or code > 599: - raise ValueError("Illegal response status from server " - "(%s is out of range)." % repr(code)) - - if code not in response_codes: - # code is unknown but not illegal - default_reason, message = "", "" - else: - default_reason, message = response_codes[code] - - if reason is None: - reason = default_reason - - return code, reason, message - - -image_map_pattern = re.compile(r"[0-9]+,[0-9]+") - -def parse_query_string(query_string, keep_blank_values=True): - """Build a params dictionary from a query_string. - - Duplicate key/value pairs in the provided query_string will be - returned as {'key': [val1, val2, ...]}. Single key/values will - be returned as strings: {'key': 'value'}. - """ - if image_map_pattern.match(query_string): - # Server-side image map. Map the coords to 'x' and 'y' - # (like CGI::Request does). - pm = query_string.split(",") - pm = {'x': int(pm[0]), 'y': int(pm[1])} - else: - pm = cgi.parse_qs(query_string, keep_blank_values) - for key, val in pm.items(): - if len(val) == 1: - pm[key] = val[0] - return pm - -def params_from_CGI_form(form): - params = {} - for key in form.keys(): - value_list = form[key] - if isinstance(value_list, list): - params[key] = [] - for item in value_list: - if item.filename is not None: - value = item # It's a file upload - else: - value = item.value # It's a regular field - params[key].append(value) - else: - if value_list.filename is not None: - value = value_list # It's a file upload - else: - value = value_list.value # It's a regular field - params[key] = value - return params - - -class CaseInsensitiveDict(dict): - """A case-insensitive dict subclass. - - Each key is changed on entry to str(key).title(). - """ - - def __getitem__(self, key): - return dict.__getitem__(self, str(key).title()) - - def __setitem__(self, key, value): - dict.__setitem__(self, str(key).title(), value) - - def __delitem__(self, key): - dict.__delitem__(self, str(key).title()) - - def __contains__(self, key): - return dict.__contains__(self, str(key).title()) - - def get(self, key, default=None): - return dict.get(self, str(key).title(), default) - - def has_key(self, key): - return dict.has_key(self, str(key).title()) - - def update(self, E): - for k in E.keys(): - self[str(k).title()] = E[k] - - def fromkeys(cls, seq, value=None): - newdict = cls() - for k in seq: - newdict[str(k).title()] = value - return newdict - fromkeys = classmethod(fromkeys) - - def setdefault(self, key, x=None): - key = str(key).title() - try: - return self[key] - except KeyError: - self[key] = x - return x - - def pop(self, key, default): - return dict.pop(self, str(key).title(), default) - - -class HeaderMap(CaseInsensitiveDict): - """A dict subclass for HTTP request and response headers. - - Each key is changed on entry to str(key).title(). This allows headers - to be case-insensitive and avoid duplicates. - - Values are header values (decoded according to RFC 2047 if necessary). - """ - - def elements(self, key): - """Return a list of HeaderElements for the given header (or None).""" - key = str(key).title() - h = self.get(key) - if h is None: - return [] - return header_elements(key, h) - - def output(self, protocol=(1, 1)): - """Transform self into a list of (name, value) tuples.""" - header_list = [] - for key, v in self.iteritems(): - if isinstance(v, unicode): - # HTTP/1.0 says, "Words of *TEXT may contain octets - # from character sets other than US-ASCII." and - # "Recipients of header field TEXT containing octets - # outside the US-ASCII character set may assume that - # they represent ISO-8859-1 characters." - try: - v = v.encode("iso-8859-1") - except UnicodeEncodeError: - if protocol >= (1, 1): - # Encode RFC-2047 TEXT - # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). - from email.Header import Header - v = Header(v, 'utf-8').encode() - else: - raise - else: - # This coercion should not take any time at all - # if value is already of type "str". - v = str(v) - header_list.append((key, v)) - return header_list - - - -class Host(object): - """An internet address. - - name should be the client's host name. If not available (because no DNS - lookup is performed), the IP address should be used instead. - """ - - ip = "0.0.0.0" - port = 80 - name = "unknown.tld" - - def __init__(self, ip, port, name=None): - self.ip = ip - self.port = port - if name is None: - name = ip - self.name = name - - def __repr__(self): - return "http.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/src/cherrypy/lib/httpauth.py b/src/cherrypy/lib/httpauth.py index f5d87d2b43..be87a785de 100644 --- a/src/cherrypy/lib/httpauth.py +++ b/src/cherrypy/lib/httpauth.py @@ -1,10 +1,9 @@ """ -httpauth modules defines functions to implement HTTP Digest Authentication (RFC 2617). +This module defines functions to implement HTTP Digest Authentication (:rfc:`2617`). This has full compliance with 'Digest' and 'Basic' authentication methods. In 'Digest' it supports both MD5 and MD5-sess algorithms. Usage: - First use 'doAuth' to request the client authentication for a certain resource. You should send an httplib.UNAUTHORIZED response to the client so he knows he has to authenticate itself. @@ -59,16 +58,9 @@ __all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse", "calculateNonce", "SUPPORTED_QOP") ################################################################################ - -try: - # Python 2.5+ - from hashlib import md5 -except ImportError: - from md5 import new as md5 - import time -import base64 -import urllib2 +from cherrypy._cpcompat import base64_decode, ntob, md5 +from cherrypy._cpcompat import parse_http_list, parse_keqv_list MD5 = "MD5" MD5_SESS = "MD5-sess" @@ -82,10 +74,10 @@ SUPPORTED_QOP = (AUTH, AUTH_INT) # doAuth # DIGEST_AUTH_ENCODERS = { - MD5: lambda val: md5(val).hexdigest(), + MD5: lambda val: md5(ntob(val)).hexdigest(), 'md5': lambda val:md5(val).hexdigest(), # Added by Kovid - MD5_SESS: lambda val: md5(val).hexdigest(), -# SHA: lambda val: sha(val).hexdigest(), + MD5_SESS: lambda val: md5(ntob(val)).hexdigest(), +# SHA: lambda val: sha.new(ntob(val)).hexdigest (), } def calculateNonce (realm, algorithm = MD5): @@ -137,32 +129,32 @@ def doAuth (realm): # def _parseDigestAuthorization (auth_params): # Convert the auth params to a dict - items = urllib2.parse_http_list (auth_params) - params = urllib2.parse_keqv_list (items) + items = parse_http_list(auth_params) + params = parse_keqv_list(items) # Now validate the params # Check for required parameters required = ["username", "realm", "nonce", "uri", "response"] for k in required: - if not params.has_key(k): + if k not in params: return None # If qop is sent then cnonce and nc MUST be present - if params.has_key("qop") and not (params.has_key("cnonce") \ - and params.has_key("nc")): + if "qop" in params and not ("cnonce" in params \ + and "nc" in params): return None # If qop is not sent, neither cnonce nor nc can be present - if (params.has_key("cnonce") or params.has_key("nc")) and \ - not params.has_key("qop"): + if ("cnonce" in params or "nc" in params) and \ + "qop" not in params: return None return params def _parseBasicAuthorization (auth_params): - username, password = base64.decodestring (auth_params).split (":", 1) + username, password = base64_decode(auth_params).split(":", 1) return {"username": username, "password": password} AUTH_SCHEMES = { @@ -343,19 +335,18 @@ def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs): other arguments that each implementation might need. If the response is of type 'Basic' then the function has the following - signature: + signature:: - checkBasicResponse (auth_map, password) -> bool + checkBasicResponse (auth_map, password) -> bool If the response is of type 'Digest' then the function has the following - signature: + signature:: - checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool + checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool The 'A1' argument is only used in MD5_SESS algorithm based responses. Check md5SessionKey() for more info. """ - global AUTH_RESPONSES checker = AUTH_RESPONSES[auth_map["auth_scheme"]] return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs) diff --git a/src/cherrypy/lib/httputil.py b/src/cherrypy/lib/httputil.py new file mode 100644 index 0000000000..5f77d54748 --- /dev/null +++ b/src/cherrypy/lib/httputil.py @@ -0,0 +1,506 @@ +"""HTTP library functions. + +This module contains functions for building an HTTP application +framework: any one, not just one whose name starts with "Ch". ;) If you +reference any modules from some popular framework inside *this* module, +FuManChu will personally hang you up by your thumbs and submit you +to a public caning. +""" + +from binascii import b2a_base64 +from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, reversed, sorted +from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr, unicodestr, unquote_qs +response_codes = BaseHTTPRequestHandler.responses.copy() + +# From http://www.cherrypy.org/ticket/361 +response_codes[500] = ('Internal Server Error', + 'The server encountered an unexpected condition ' + 'which prevented it from fulfilling the request.') +response_codes[503] = ('Service Unavailable', + 'The server is currently unable to handle the ' + 'request due to a temporary overloading or ' + 'maintenance of the server.') + +import re +import urllib + + + +def urljoin(*atoms): + """Return the given path \*atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = "/".join([x for x in atoms if x]) + while "//" in url: + url = url.replace("//", "/") + # Special-case the final url of "", and return "/" instead. + return url or "/" + +def urljoin_bytes(*atoms): + """Return the given path *atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = ntob("/").join([x for x in atoms if x]) + while ntob("//") in url: + url = url.replace(ntob("//"), ntob("/")) + # Special-case the final url of "", and return "/" instead. + return url or ntob("/") + +def protocol_from_http(protocol_str): + """Return a protocol tuple from the given 'HTTP/x.y' string.""" + return int(protocol_str[5]), int(protocol_str[7]) + +def get_ranges(headervalue, content_length): + """Return a list of (start, stop) indices from a Range header, or None. + + Each (start, stop) tuple will be composed of two ints, which are suitable + for use in a slicing operation. That is, the header "Range: bytes=3-6", + if applied against a Python string, is requesting resource[3:7]. This + function will return the list [(3, 7)]. + + If this function returns an empty list, you should return HTTP 416. + """ + + if not headervalue: + return None + + result = [] + bytesunit, byteranges = headervalue.split("=", 1) + for brange in byteranges.split(","): + start, stop = [x.strip() for x in brange.split("-", 1)] + if start: + if not stop: + stop = content_length - 1 + start, stop = int(start), int(stop) + if start >= content_length: + # From rfc 2616 sec 14.16: + # "If the server receives a request (other than one + # including an If-Range request-header field) with an + # unsatisfiable Range request-header field (that is, + # all of whose byte-range-spec values have a first-byte-pos + # value greater than the current length of the selected + # resource), it SHOULD return a response code of 416 + # (Requested range not satisfiable)." + continue + if stop < start: + # From rfc 2616 sec 14.16: + # "If the server ignores a byte-range-spec because it + # is syntactically invalid, the server SHOULD treat + # the request as if the invalid Range header field + # did not exist. (Normally, this means return a 200 + # response containing the full entity)." + return None + result.append((start, stop + 1)) + else: + if not stop: + # See rfc quote above. + return None + # Negative subscript (last N bytes) + result.append((content_length - int(stop), content_length)) + + return result + + +class HeaderElement(object): + """An element (with parameters) from an HTTP header's element list.""" + + def __init__(self, value, params=None): + self.value = value + if params is None: + params = {} + self.params = params + + def __cmp__(self, other): + return cmp(self.value, other.value) + + def __lt__(self, other): + return self.value < other.value + + def __str__(self): + p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] + return "%s%s" % (self.value, "".join(p)) + + def __bytes__(self): + return ntob(self.__str__()) + + def __unicode__(self): + return ntou(self.__str__()) + + def parse(elementstr): + """Transform 'token;key=val' to ('token', {'key': 'val'}).""" + # Split the element into a value and parameters. The 'value' may + # be of the form, "token=token", but we don't split that here. + atoms = [x.strip() for x in elementstr.split(";") if x.strip()] + if not atoms: + initial_value = '' + else: + initial_value = atoms.pop(0).strip() + params = {} + for atom in atoms: + atom = [x.strip() for x in atom.split("=", 1) if x.strip()] + key = atom.pop(0) + if atom: + val = atom[0] + else: + val = "" + params[key] = val + return initial_value, params + parse = staticmethod(parse) + + def from_str(cls, elementstr): + """Construct an instance from a string of the form 'token;key=val'.""" + ival, params = cls.parse(elementstr) + return cls(ival, params) + from_str = classmethod(from_str) + + +q_separator = re.compile(r'; *q *=') + +class AcceptElement(HeaderElement): + """An element (with parameters) from an Accept* header's element list. + + AcceptElement objects are comparable; the more-preferred object will be + "less than" the less-preferred object. They are also therefore sortable; + if you sort a list of AcceptElement objects, they will be listed in + priority order; the most preferred value will be first. Yes, it should + have been the other way around, but it's too late to fix now. + """ + + def from_str(cls, elementstr): + qvalue = None + # The first "q" parameter (if any) separates the initial + # media-range parameter(s) (if any) from the accept-params. + atoms = q_separator.split(elementstr, 1) + media_range = atoms.pop(0).strip() + if atoms: + # The qvalue for an Accept header can have extensions. The other + # headers cannot, but it's easier to parse them as if they did. + qvalue = HeaderElement.from_str(atoms[0].strip()) + + media_type, params = cls.parse(media_range) + if qvalue is not None: + params["q"] = qvalue + return cls(media_type, params) + from_str = classmethod(from_str) + + def qvalue(self): + val = self.params.get("q", "1") + if isinstance(val, HeaderElement): + val = val.value + return float(val) + qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") + + def __cmp__(self, other): + diff = cmp(self.qvalue, other.qvalue) + if diff == 0: + diff = cmp(str(self), str(other)) + return diff + + def __lt__(self, other): + if self.qvalue == other.qvalue: + return str(self) < str(other) + else: + return self.qvalue < other.qvalue + + +def header_elements(fieldname, fieldvalue): + """Return a sorted HeaderElement list from a comma-separated header string.""" + if not fieldvalue: + return [] + + result = [] + for element in fieldvalue.split(","): + if fieldname.startswith("Accept") or fieldname == 'TE': + hv = AcceptElement.from_str(element) + else: + hv = HeaderElement.from_str(element) + result.append(hv) + + return list(reversed(sorted(result))) + +def decode_TEXT(value): + r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr").""" + try: + # Python 3 + from email.header import decode_header + except ImportError: + from email.Header import decode_header + atoms = decode_header(value) + decodedvalue = "" + for atom, charset in atoms: + if charset is not None: + atom = atom.decode(charset) + decodedvalue += atom + return decodedvalue + +def valid_status(status): + """Return legal HTTP status Code, Reason-phrase and Message. + + The status arg must be an int, or a str that begins with an int. + + If status is an int, or a str and no reason-phrase is supplied, + a default reason-phrase will be provided. + """ + + if not status: + status = 200 + + status = str(status) + parts = status.split(" ", 1) + if len(parts) == 1: + # No reason supplied. + code, = parts + reason = None + else: + code, reason = parts + reason = reason.strip() + + try: + code = int(code) + except ValueError: + raise ValueError("Illegal response status from server " + "(%s is non-numeric)." % repr(code)) + + if code < 100 or code > 599: + raise ValueError("Illegal response status from server " + "(%s is out of range)." % repr(code)) + + if code not in response_codes: + # code is unknown but not illegal + default_reason, message = "", "" + else: + default_reason, message = response_codes[code] + + if reason is None: + reason = default_reason + + return code, reason, message + + +# NOTE: the parse_qs functions that follow are modified version of those +# in the python3.0 source - we need to pass through an encoding to the unquote +# method, but the default parse_qs function doesn't allow us to. These do. + +def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + Returns a dict, as G-d intended. + """ + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + d = {} + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = unquote_qs(nv[0], encoding) + value = unquote_qs(nv[1], encoding) + if name in d: + if not isinstance(d[name], list): + d[name] = [d[name]] + d[name].append(value) + else: + d[name] = value + return d + + +image_map_pattern = re.compile(r"[0-9]+,[0-9]+") + +def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): + """Build a params dictionary from a query_string. + + Duplicate key/value pairs in the provided query_string will be + returned as {'key': [val1, val2, ...]}. Single key/values will + be returned as strings: {'key': 'value'}. + """ + if image_map_pattern.match(query_string): + # Server-side image map. Map the coords to 'x' and 'y' + # (like CGI::Request does). + pm = query_string.split(",") + pm = {'x': int(pm[0]), 'y': int(pm[1])} + else: + pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) + return pm + + +class CaseInsensitiveDict(dict): + """A case-insensitive dict subclass. + + Each key is changed on entry to str(key).title(). + """ + + def __getitem__(self, key): + return dict.__getitem__(self, str(key).title()) + + def __setitem__(self, key, value): + dict.__setitem__(self, str(key).title(), value) + + def __delitem__(self, key): + dict.__delitem__(self, str(key).title()) + + def __contains__(self, key): + return dict.__contains__(self, str(key).title()) + + def get(self, key, default=None): + return dict.get(self, str(key).title(), default) + + if hasattr({}, 'has_key'): + def has_key(self, key): + return dict.has_key(self, str(key).title()) + + def update(self, E): + for k in E.keys(): + self[str(k).title()] = E[k] + + def fromkeys(cls, seq, value=None): + newdict = cls() + for k in seq: + newdict[str(k).title()] = value + return newdict + fromkeys = classmethod(fromkeys) + + def setdefault(self, key, x=None): + key = str(key).title() + try: + return self[key] + except KeyError: + self[key] = x + return x + + def pop(self, key, default): + return dict.pop(self, str(key).title(), default) + + +# TEXT = +# +# A CRLF is allowed in the definition of TEXT only as part of a header +# field continuation. It is expected that the folding LWS will be +# replaced with a single SP before interpretation of the TEXT value." +if nativestr == bytestr: + header_translate_table = ''.join([chr(i) for i in xrange(256)]) + header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(127) +else: + header_translate_table = None + header_translate_deletechars = bytes(range(32)) + bytes([127]) + + +class HeaderMap(CaseInsensitiveDict): + """A dict subclass for HTTP request and response headers. + + Each key is changed on entry to str(key).title(). This allows headers + to be case-insensitive and avoid duplicates. + + Values are header values (decoded according to :rfc:`2047` if necessary). + """ + + protocol=(1, 1) + encodings = ["ISO-8859-1"] + + # Someday, when http-bis is done, this will probably get dropped + # since few servers, clients, or intermediaries do it. But until then, + # we're going to obey the spec as is. + # "Words of *TEXT MAY contain characters from character sets other than + # ISO-8859-1 only when encoded according to the rules of RFC 2047." + use_rfc_2047 = True + + def elements(self, key): + """Return a sorted list of HeaderElements for the given header.""" + key = str(key).title() + value = self.get(key) + return header_elements(key, value) + + def values(self, key): + """Return a sorted list of HeaderElement.value for the given header.""" + return [e.value for e in self.elements(key)] + + def output(self): + """Transform self into a list of (name, value) tuples.""" + header_list = [] + for k, v in self.items(): + if isinstance(k, unicodestr): + k = self.encode(k) + + if not isinstance(v, basestring): + v = str(v) + + if isinstance(v, unicodestr): + v = self.encode(v) + + # See header_translate_* constants above. + # Replace only if you really know what you're doing. + k = k.translate(header_translate_table, header_translate_deletechars) + v = v.translate(header_translate_table, header_translate_deletechars) + + header_list.append((k, v)) + return header_list + + def encode(self, v): + """Return the given header name or value, encoded for HTTP output.""" + for enc in self.encodings: + try: + return v.encode(enc) + except UnicodeEncodeError: + continue + + if self.protocol == (1, 1) and self.use_rfc_2047: + # Encode RFC-2047 TEXT + # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). + # We do our own here instead of using the email module + # because we never want to fold lines--folding has + # been deprecated by the HTTP working group. + v = b2a_base64(v.encode('utf-8')) + return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) + + raise ValueError("Could not encode header part %r using " + "any of the encodings %r." % + (v, self.encodings)) + + +class Host(object): + """An internet address. + + name + Should be the client's host name. If not available (because no DNS + lookup is performed), the IP address should be used instead. + + """ + + ip = "0.0.0.0" + port = 80 + name = "unknown.tld" + + def __init__(self, ip, port, name=None): + self.ip = ip + self.port = port + if name is None: + name = ip + self.name = name + + def __repr__(self): + return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/src/cherrypy/lib/jsontools.py b/src/cherrypy/lib/jsontools.py new file mode 100644 index 0000000000..209257914f --- /dev/null +++ b/src/cherrypy/lib/jsontools.py @@ -0,0 +1,87 @@ +import sys +import cherrypy +from cherrypy._cpcompat import basestring, ntou, json, json_encode, json_decode + +def json_processor(entity): + """Read application/json data into request.json.""" + if not entity.headers.get(ntou("Content-Length"), ntou("")): + raise cherrypy.HTTPError(411) + + body = entity.fp.read() + try: + cherrypy.serving.request.json = json_decode(body.decode('utf-8')) + except ValueError: + raise cherrypy.HTTPError(400, 'Invalid JSON document') + +def json_in(content_type=[ntou('application/json'), ntou('text/javascript')], + force=True, debug=False, processor = json_processor): + """Add a processor to parse JSON request entities: + The default processor places the parsed data into request.json. + + Incoming request entities which match the given content_type(s) will + be deserialized from JSON to the Python equivalent, and the result + stored at cherrypy.request.json. The 'content_type' argument may + be a Content-Type string or a list of allowable Content-Type strings. + + If the 'force' argument is True (the default), then entities of other + content types will not be allowed; "415 Unsupported Media Type" is + raised instead. + + Supply your own processor to use a custom decoder, or to handle the parsed + data differently. The processor can be configured via + tools.json_in.processor or via the decorator method. + + Note that the deserializer requires the client send a Content-Length + request header, or it will raise "411 Length Required". If for any + other reason the request entity cannot be deserialized from JSON, + it will raise "400 Bad Request: Invalid JSON document". + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if isinstance(content_type, basestring): + content_type = [content_type] + + if force: + if debug: + cherrypy.log('Removing body processors %s' % + repr(request.body.processors.keys()), 'TOOLS.JSON_IN') + request.body.processors.clear() + request.body.default_proc = cherrypy.HTTPError( + 415, 'Expected an entity of content type %s' % + ', '.join(content_type)) + + for ct in content_type: + if debug: + cherrypy.log('Adding body processor for %s' % ct, 'TOOLS.JSON_IN') + request.body.processors[ct] = processor + +def json_handler(*args, **kwargs): + value = cherrypy.serving.request._json_inner_handler(*args, **kwargs) + return json_encode(value) + +def json_out(content_type='application/json', debug=False, handler=json_handler): + """Wrap request.handler to serialize its output to JSON. Sets Content-Type. + + If the given content_type is None, the Content-Type response header + is not set. + + Provide your own handler to use a custom encoder. For example + cherrypy.config['tools.json_out.handler'] = , or + @json_out(handler=function). + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if debug: + cherrypy.log('Replacing %s with JSON handler' % request.handler, + 'TOOLS.JSON_OUT') + request._json_inner_handler = request.handler + request.handler = handler + if content_type is not None: + if debug: + cherrypy.log('Setting Content-Type to %s' % content_type, 'TOOLS.JSON_OUT') + cherrypy.serving.response.headers['Content-Type'] = content_type + diff --git a/src/cherrypy/lib/profiler.py b/src/cherrypy/lib/profiler.py index 704fec47a5..785d58a302 100644 --- a/src/cherrypy/lib/profiler.py +++ b/src/cherrypy/lib/profiler.py @@ -3,7 +3,7 @@ CherryPy users ============== -You can profile any of your pages as follows: +You can profile any of your pages as follows:: from cherrypy.lib import profiler @@ -19,25 +19,23 @@ You can profile any of your pages as follows: cherrypy.tree.mount(Root()) - You can also turn on profiling for all requests -using the make_app function as WSGI middleware. - +using the ``make_app`` function as WSGI middleware. CherryPy developers =================== This module can be used whenever you make changes to CherryPy, to get a quick sanity-check on overall CP performance. Use the -"--profile" flag when running the test suite. Then, use the serve() +``--profile`` flag when running the test suite. Then, use the ``serve()`` function to browse the results in a web browser. If you run this -module from the command line, it will call serve() for you. +module from the command line, it will call ``serve()`` for you. """ -# Make profiler output more readable by adding __init__ modules' parents. def new_func_strip_path(func_name): + """Make profiler output more readable by adding ``__init__`` modules' parents""" filename, line, name = func_name if filename.endswith("__init__.py"): return os.path.basename(filename[:-12]) + filename[-12:], line, name @@ -50,21 +48,12 @@ try: except ImportError: profile = None pstats = None - import warnings - msg = ("Your installation of Python does not have a profile module. " - "If you're on Debian, you can apt-get python2.4-profiler from " - "non-free in a separate step. See http://www.cherrypy.org/wiki/" - "ProfilingOnDebian for details.") - warnings.warn(msg) import os, os.path import sys +import warnings -try: - import cStringIO as StringIO -except ImportError: - import StringIO - +from cherrypy._cpcompat import BytesIO _count = 0 @@ -88,13 +77,15 @@ class Profiler(object): return result def statfiles(self): - """statfiles() -> list of available profiles.""" + """:rtype: list of available profiles. + """ return [f for f in os.listdir(self.path) if f.startswith("cp_") and f.endswith(".prof")] def stats(self, filename, sortby='cumulative'): - """stats(index) -> output of print_stats() for the given profile.""" - sio = StringIO.StringIO() + """:rtype stats(index): output of print_stats() for the given profile. + """ + sio = BytesIO() if sys.version_info >= (2, 5): s = pstats.Stats(os.path.join(self.path, filename), stream=sio) s.strip_dirs() @@ -162,13 +153,25 @@ class make_app: def __init__(self, nextapp, path=None, aggregate=False): """Make a WSGI middleware app which wraps 'nextapp' with profiling. - nextapp: the WSGI application to wrap, usually an instance of + nextapp + the WSGI application to wrap, usually an instance of cherrypy.Application. - path: where to dump the profiling output. - aggregate: if True, profile data for all HTTP requests will go in + + path + where to dump the profiling output. + + aggregate + if True, profile data for all HTTP requests will go in a single file. If False (the default), each HTTP request will dump its profile data into a separate file. + """ + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try `sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian for details.") + warnings.warn(msg) + self.nextapp = nextapp self.aggregate = aggregate if aggregate: @@ -186,6 +189,12 @@ class make_app: def serve(path=None, port=8080): + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try `sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian for details.") + warnings.warn(msg) + import cherrypy cherrypy.config.update({'server.socket_port': int(port), 'server.thread_pool': 10, diff --git a/src/cherrypy/lib/reprconf.py b/src/cherrypy/lib/reprconf.py new file mode 100644 index 0000000000..ba8ff51e41 --- /dev/null +++ b/src/cherrypy/lib/reprconf.py @@ -0,0 +1,485 @@ +"""Generic configuration system using unrepr. + +Configuration data may be supplied as a Python dictionary, as a filename, +or as an open file object. When you supply a filename or file, Python's +builtin ConfigParser is used (with some extensions). + +Namespaces +---------- + +Configuration keys are separated into namespaces by the first "." in the key. + +The only key that cannot exist in a namespace is the "environment" entry. +This special entry 'imports' other config entries from a template stored in +the Config.environments dict. + +You can define your own namespaces to be called when new config is merged +by adding a named handler to Config.namespaces. The name can be any string, +and the handler must be either a callable or a context manager. +""" + +try: + # Python 3.0+ + from configparser import ConfigParser +except ImportError: + from ConfigParser import ConfigParser + +try: + set +except NameError: + from sets import Set as set + +try: + basestring +except NameError: + basestring = str + +try: + # Python 3 + import builtins +except ImportError: + # Python 2 + import __builtin__ as builtins + +import operator as _operator +import sys + +def as_dict(config): + """Return a dict from 'config' whether it is a dict, file, or filename.""" + if isinstance(config, basestring): + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + config = Parser().dict_from_file(config) + return config + + +class NamespaceSet(dict): + """A dict of config namespace names and handlers. + + Each config entry should begin with a namespace name; the corresponding + namespace handler will be called once for each config entry in that + namespace, and will be passed two arguments: the config key (with the + namespace removed) and the config value. + + Namespace handlers may be any Python callable; they may also be + Python 2.5-style 'context managers', in which case their __enter__ + method should return a callable to be used as the handler. + See cherrypy.tools (the Toolbox class) for an example. + """ + + def __call__(self, config): + """Iterate through config and pass it to each namespace handler. + + config + A flat dict, where keys use dots to separate + namespaces, and values are arbitrary. + + The first name in each config key is used to look up the corresponding + namespace handler. For example, a config entry of {'tools.gzip.on': v} + will call the 'tools' namespace handler with the args: ('gzip.on', v) + """ + # Separate the given config into namespaces + ns_confs = {} + for k in config: + if "." in k: + ns, name = k.split(".", 1) + bucket = ns_confs.setdefault(ns, {}) + bucket[name] = config[k] + + # I chose __enter__ and __exit__ so someday this could be + # rewritten using Python 2.5's 'with' statement: + # for ns, handler in self.iteritems(): + # with handler as callable: + # for k, v in ns_confs.get(ns, {}).iteritems(): + # callable(k, v) + for ns, handler in self.items(): + exit = getattr(handler, "__exit__", None) + if exit: + callable = handler.__enter__() + no_exc = True + try: + try: + for k, v in ns_confs.get(ns, {}).items(): + callable(k, v) + except: + # The exceptional case is handled here + no_exc = False + if exit is None: + raise + if not exit(*sys.exc_info()): + raise + # The exception is swallowed if exit() returns true + finally: + # The normal and non-local-goto cases are handled here + if no_exc and exit: + exit(None, None, None) + else: + for k, v in ns_confs.get(ns, {}).items(): + handler(k, v) + + def __repr__(self): + return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, + dict.__repr__(self)) + + def __copy__(self): + newobj = self.__class__() + newobj.update(self) + return newobj + copy = __copy__ + + +class Config(dict): + """A dict-like set of configuration data, with defaults and namespaces. + + May take a file, filename, or dict. + """ + + defaults = {} + environments = {} + namespaces = NamespaceSet() + + def __init__(self, file=None, **kwargs): + self.reset() + if file is not None: + self.update(file) + if kwargs: + self.update(kwargs) + + def reset(self): + """Reset self to default values.""" + self.clear() + dict.update(self, self.defaults) + + def update(self, config): + """Update self from a dict, file or filename.""" + if isinstance(config, basestring): + # Filename + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + # Open file object + config = Parser().dict_from_file(config) + else: + config = config.copy() + self._apply(config) + + def _apply(self, config): + """Update self from a dict.""" + which_env = config.get('environment') + if which_env: + env = self.environments[which_env] + for k in env: + if k not in config: + config[k] = env[k] + + dict.update(self, config) + self.namespaces(config) + + def __setitem__(self, k, v): + dict.__setitem__(self, k, v) + self.namespaces({k: v}) + + +class Parser(ConfigParser): + """Sub-class of ConfigParser that keeps the case of options and that + raises an exception if the file cannot be read. + """ + + def optionxform(self, optionstr): + return optionstr + + def read(self, filenames): + if isinstance(filenames, basestring): + filenames = [filenames] + for filename in filenames: + # try: + # fp = open(filename) + # except IOError: + # continue + fp = open(filename) + try: + self._read(fp, filename) + finally: + fp.close() + + def as_dict(self, raw=False, vars=None): + """Convert an INI file to a dictionary""" + # Load INI file into a dict + result = {} + for section in self.sections(): + if section not in result: + result[section] = {} + for option in self.options(section): + value = self.get(section, option, raw=raw, vars=vars) + try: + value = unrepr(value) + except Exception: + x = sys.exc_info()[1] + msg = ("Config error in section: %r, option: %r, " + "value: %r. Config values must be valid Python." % + (section, option, value)) + raise ValueError(msg, x.__class__.__name__, x.args) + result[section][option] = value + return result + + def dict_from_file(self, file): + if hasattr(file, 'read'): + self.readfp(file) + else: + self.read(file) + return self.as_dict() + + +# public domain "unrepr" implementation, found on the web and then improved. + + +class _Builder2: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def astnode(self, s): + """Return a Python2 ast Node compiled from a string.""" + try: + import compiler + except ImportError: + # Fallback to eval when compiler package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = compiler.parse("__tempvalue__ = " + s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + def build_Subscript(self, o): + expr, flags, subs = o.getChildren() + expr = self.build(expr) + subs = self.build(subs) + return expr[subs] + + def build_CallFunc(self, o): + children = map(self.build, o.getChildren()) + callee = children.pop(0) + kwargs = children.pop() or {} + starargs = children.pop() or () + args = tuple(children) + tuple(starargs) + return callee(*args, **kwargs) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.name + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + return getattr(builtins, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_Add(self, o): + left, right = map(self.build, o.getChildren()) + return left + right + + def build_Mul(self, o): + left, right = map(self.build, o.getChildren()) + return left * right + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_NoneType(self, o): + return None + + def build_UnarySub(self, o): + return -self.build(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build(o.getChildren()[0]) + + +class _Builder3: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def astnode(self, s): + """Return a Python3 ast Node compiled from a string.""" + try: + import ast + except ImportError: + # Fallback to eval when ast package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = ast.parse("__tempvalue__ = " + s) + return p.body[0].value + + def build_Subscript(self, o): + return self.build(o.value)[self.build(o.slice)] + + def build_Index(self, o): + return self.build(o.value) + + def build_Call(self, o): + callee = self.build(o.func) + + if o.args is None: + args = () + else: + args = tuple([self.build(a) for a in o.args]) + + if o.starargs is None: + starargs = () + else: + starargs = self.build(o.starargs) + + if o.kwargs is None: + kwargs = {} + else: + kwargs = self.build(o.kwargs) + + return callee(*(args + starargs), **kwargs) + + def build_List(self, o): + return list(map(self.build, o.elts)) + + def build_Str(self, o): + return o.s + + def build_Num(self, o): + return o.n + + def build_Dict(self, o): + return dict([(self.build(k), self.build(v)) + for k, v in zip(o.keys, o.values)]) + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.id + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + import builtins + return getattr(builtins, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_UnaryOp(self, o): + op, operand = map(self.build, [o.op, o.operand]) + return op(operand) + + def build_BinOp(self, o): + left, op, right = map(self.build, [o.left, o.op, o.right]) + return op(left, right) + + def build_Add(self, o): + return _operator.add + + def build_Mult(self, o): + return _operator.mul + + def build_USub(self, o): + return _operator.neg + + def build_Attribute(self, o): + parent = self.build(o.value) + return getattr(parent, o.attr) + + def build_NoneType(self, o): + return None + + +def unrepr(s): + """Return a Python object compiled from a string.""" + if not s: + return s + if sys.version_info < (3, 0): + b = _Builder2() + else: + b = _Builder3() + obj = b.astnode(s) + return b.build(obj) + + +def modules(modulePath): + """Load a module and retrieve a reference to that module.""" + try: + mod = sys.modules[modulePath] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(modulePath, globals(), locals(), ['']) + return mod + +def attributes(full_attribute_name): + """Load a module and retrieve an attribute of that module.""" + + # Parse out the path, module, and attribute + last_dot = full_attribute_name.rfind(".") + attr_name = full_attribute_name[last_dot + 1:] + mod_path = full_attribute_name[:last_dot] + + mod = modules(mod_path) + # Let an AttributeError propagate outward. + try: + attr = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + # Return a reference to the attribute. + return attr + + diff --git a/src/cherrypy/lib/safemime.py b/src/cherrypy/lib/safemime.py deleted file mode 100644 index 0d13ae9a91..0000000000 --- a/src/cherrypy/lib/safemime.py +++ /dev/null @@ -1,128 +0,0 @@ -import cherrypy - - -class MultipartWrapper(object): - """Wraps a file-like object, returning '' when Content-Length is reached. - - The cgi module's logic for reading multipart MIME messages doesn't - allow the parts to know when the Content-Length for the entire message - has been reached, and doesn't allow for multipart-MIME messages that - omit the trailing CRLF (Flash 8's FileReference.upload(url), for example, - does this). The read_lines_to_outerboundary function gets stuck in a loop - until the socket times out. - - This rfile wrapper simply monitors the incoming stream. When a read is - attempted past the Content-Length, it returns an empty string rather - than timing out (of course, if the last read *overlaps* the C-L, you'll - get the last bit of data up to C-L, and then the next read will return - an empty string). - """ - - def __init__(self, rfile, clen): - self.rfile = rfile - self.clen = clen - self.bytes_read = 0 - - def read(self, size = None): - if self.clen: - # Return '' if we've read all the data. - if self.bytes_read >= self.clen: - return '' - - # Reduce 'size' if it's over our limit. - new_bytes_read = self.bytes_read + size - if new_bytes_read > self.clen: - size = self.clen - self.bytes_read - - data = self.rfile.read(size) - self.bytes_read += len(data) - return data - - def readline(self, size = None): - if size is not None: - if self.clen: - # Return '' if we've read all the data. - if self.bytes_read >= self.clen: - return '' - - # Reduce 'size' if it's over our limit. - new_bytes_read = self.bytes_read + size - if new_bytes_read > self.clen: - size = self.clen - self.bytes_read - - data = self.rfile.readline(size) - self.bytes_read += len(data) - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - size = 256 - while True: - if self.clen: - # Return if we've read all the data. - if self.bytes_read >= self.clen: - return ''.join(res) - - # Reduce 'size' if it's over our limit. - new_bytes_read = self.bytes_read + size - if new_bytes_read > self.clen: - size = self.clen - self.bytes_read - - data = self.rfile.readline(size) - self.bytes_read += len(data) - res.append(data) - # See http://www.cherrypy.org/ticket/421 - if len(data) < size or data[-1:] == "\n": - return ''.join(res) - - def readlines(self, sizehint = 0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self.rfile - - def next(self): - if self.clen: - # Return '' if we've read all the data. - if self.bytes_read >= self.clen: - return '' - - data = self.rfile.next() - self.bytes_read += len(data) - return data - - -def safe_multipart(flash_only=False): - """Wrap request.rfile in a reader that won't crash on no trailing CRLF.""" - h = cherrypy.request.headers - if not h.get('Content-Type','').startswith('multipart/'): - return - if flash_only and not 'Shockwave Flash' in h.get('User-Agent', ''): - return - - clen = h.get('Content-Length', '0') - try: - clen = int(clen) - except ValueError: - return - cherrypy.request.rfile = MultipartWrapper(cherrypy.request.rfile, clen) - -def init(): - """Create a Tool for safe_multipart and add it to cherrypy.tools.""" - cherrypy.tools.safe_multipart = cherrypy.Tool('before_request_body', - safe_multipart) - diff --git a/src/cherrypy/lib/sessions.py b/src/cherrypy/lib/sessions.py index 326e72c2b2..1cd651c4f2 100644 --- a/src/cherrypy/lib/sessions.py +++ b/src/cherrypy/lib/sessions.py @@ -1,32 +1,99 @@ """Session implementation for CherryPy. -We use cherrypy.request to store some convenient variables as -well as data about the session for the current request. Instead of -polluting cherrypy.request we use a Session object bound to -cherrypy.session to store these variables. +You need to edit your config file to use sessions. Here's an example:: + + [/] + tools.sessions.on = True + tools.sessions.storage_type = "file" + tools.sessions.storage_path = "/home/site/sessions" + tools.sessions.timeout = 60 + +This sets the session to be stored in files in the directory /home/site/sessions, +and the session timeout to 60 minutes. If you omit ``storage_type`` the sessions +will be saved in RAM. ``tools.sessions.on`` is the only required line for +working sessions, the rest are optional. + +By default, the session ID is passed in a cookie, so the client's browser must +have cookies enabled for your site. + +To set data for the current session, use +``cherrypy.session['fieldname'] = 'fieldvalue'``; +to get data use ``cherrypy.session.get('fieldname')``. + +================ +Locking sessions +================ + +By default, the ``'locking'`` mode of sessions is ``'implicit'``, which means +the session is locked early and unlocked late. If you want to control when the +session data is locked and unlocked, set ``tools.sessions.locking = 'explicit'``. +Then call ``cherrypy.session.acquire_lock()`` and ``cherrypy.session.release_lock()``. +Regardless of which mode you use, the session is guaranteed to be unlocked when +the request is complete. + +================= +Expiring Sessions +================= + +You can force a session to expire with :func:`cherrypy.lib.sessions.expire`. +Simply call that function at the point you want the session to expire, and it +will cause the session cookie to expire client-side. + +=========================== +Session Fixation Protection +=========================== + +If CherryPy receives, via a request cookie, a session id that it does not +recognize, it will reject that id and create a new one to return in the +response cookie. This `helps prevent session fixation attacks +`_. +However, CherryPy "recognizes" a session id by looking up the saved session +data for that id. Therefore, if you never save any session data, +**you will get a new session id for every request**. + +================ +Sharing Sessions +================ + +If you run multiple instances of CherryPy (for example via mod_python behind +Apache prefork), you most likely cannot use the RAM session backend, since each +instance of CherryPy will have its own memory space. Use a different backend +instead, and verify that all instances are pointing at the same file or db +location. Alternately, you might try a load balancer which makes sessions +"sticky". Google is your friend, there. + +================ +Expiration Dates +================ + +The response cookie will possess an expiration date to inform the client at +which point to stop sending the cookie back in requests. If the server time +and client time differ, expect sessions to be unreliable. **Make sure the +system time of your server is accurate**. + +CherryPy defaults to a 60-minute session timeout, which also applies to the +cookie which is sent to the client. Unfortunately, some versions of Safari +("4 public beta" on Windows XP at least) appear to have a bug in their parsing +of the GMT expiration date--they appear to interpret the date as one hour in +the past. Sixty minutes minus one hour is pretty close to zero, so you may +experience this bug as a new session id for every request, unless the requests +are less than one second apart. To fix, try increasing the session.timeout. + +On the other extreme, some users report Firefox sending cookies after their +expiration date, although this was on a system with an inaccurate system time. +Maybe FF doesn't trust system time. """ import datetime import os -try: - import cPickle as pickle -except ImportError: - import pickle -import random - -try: - # Python 2.5+ - from hashlib import sha1 as sha -except ImportError: - from sha import new as sha - import time import threading import types from warnings import warn import cherrypy -from cherrypy.lib import http +from cherrypy._cpcompat import copyitems, pickle, random20, unicodestr +from cherrypy.lib import httputil missing = object() @@ -34,59 +101,90 @@ missing = object() class Session(object): """A CherryPy dict-like Session object (one per request).""" - __metaclass__ = cherrypy._AttributeDocstrings - _id = None - id_observers = None - id_observers__doc = "A list of callbacks to which to pass new id's." - id__doc = "The current session ID." + id_observers = None + "A list of callbacks to which to pass new id's." + def _get_id(self): return self._id def _set_id(self, value): self._id = value for o in self.id_observers: o(value) - id = property(_get_id, _set_id, doc=id__doc) + id = property(_get_id, _set_id, doc="The current session ID.") timeout = 60 - timeout__doc = "Number of minutes after which to delete session data." + "Number of minutes after which to delete session data." locked = False - locked__doc = """ + """ If True, this session instance has exclusive read/write access to session data.""" loaded = False - loaded__doc = """ + """ If True, data has been retrieved from storage. This should happen automatically on the first attempt to access session data.""" clean_thread = None - clean_thread__doc = "Class-level Monitor which calls self.clean_up." + "Class-level Monitor which calls self.clean_up." clean_freq = 5 - clean_freq__doc = "The poll rate for expired session cleanup in minutes." + "The poll rate for expired session cleanup in minutes." + + originalid = None + "The session id passed by the client. May be missing or unsafe." + + missing = False + "True if the session requested by the client did not exist." + + regenerated = False + """ + True if the application called session.regenerate(). This is not set by + internal calls to regenerate the session id.""" + + debug=False def __init__(self, id=None, **kwargs): self.id_observers = [] self._data = {} - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(self, k, v) + self.originalid = id + self.missing = False if id is None: - self.regenerate() + if self.debug: + cherrypy.log('No id given; making a new one', 'TOOLS.SESSIONS') + self._regenerate() else: self.id = id if not self._exists(): + if self.debug: + cherrypy.log('Expired or malicious session %r; ' + 'making a new one' % id, 'TOOLS.SESSIONS') # Expired or malicious session. Make a new one. # See http://www.cherrypy.org/ticket/709. self.id = None - self.regenerate() + self.missing = True + self._regenerate() + + def now(self): + """Generate the session specific concept of 'now'. + + Other session providers can override this to use alternative, + possibly timezone aware, versions of 'now'. + """ + return datetime.datetime.now() def regenerate(self): """Replace the current session (with a new id).""" + self.regenerated = True + self._regenerate() + + def _regenerate(self): if self.id is not None: self.delete() @@ -108,26 +206,21 @@ class Session(object): """Clean up expired sessions.""" pass - try: - os.urandom(20) - except (AttributeError, NotImplementedError): - # os.urandom not available until Python 2.4. Fall back to random.random. - def generate_id(self): - """Return a new session id.""" - return sha('%s' % random.random()).hexdigest() - else: - def generate_id(self): - """Return a new session id.""" - return os.urandom(20).encode('hex') + def generate_id(self): + """Return a new session id.""" + return random20() def save(self): """Save session data.""" try: # If session data has never been loaded then it's never been - # accessed: no need to delete it + # accessed: no need to save it if self.loaded: t = datetime.timedelta(seconds = self.timeout * 60) - expiration_time = datetime.datetime.now() + t + expiration_time = self.now() + t + if self.debug: + cherrypy.log('Saving with expiry %s' % expiration_time, + 'TOOLS.SESSIONS') self._save(expiration_time) finally: @@ -139,8 +232,9 @@ class Session(object): """Copy stored session data into this session instance.""" data = self._load() # data is either None or a tuple (session_data, expiration_time) - if data is None or data[1] < datetime.datetime.now(): - # Expired session: flush session data + if data is None or data[1] < self.now(): + if self.debug: + cherrypy.log('Expired session, flushing data', 'TOOLS.SESSIONS') self._data = {} else: self._data = data[0] @@ -153,7 +247,8 @@ class Session(object): # clean_up is in instancemethod and not a classmethod, # so that tool config can be accessed inside the method. t = cherrypy.process.plugins.Monitor( - cherrypy.engine, self.clean_up, self.clean_freq * 60) + cherrypy.engine, self.clean_up, self.clean_freq * 60, + name='Session cleanup') t.subscribe() cls.clean_thread = t t.start() @@ -189,10 +284,11 @@ class Session(object): if not self.loaded: self.load() return key in self._data - def has_key(self, key): - """D.has_key(k) -> True if D has a key k, else False.""" - if not self.loaded: self.load() - return self._data.has_key(key) + if hasattr({}, 'has_key'): + def has_key(self, key): + """D.has_key(k) -> True if D has a key k, else False.""" + if not self.loaded: self.load() + return key in self._data def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -238,9 +334,9 @@ class RamSession(Session): def clean_up(self): """Clean up expired sessions.""" - now = datetime.datetime.now() - for id, (data, expiration_time) in self.cache.items(): - if expiration_time < now: + now = self.now() + for id, (data, expiration_time) in copyitems(self.cache): + if expiration_time <= now: try: del self.cache[id] except KeyError: @@ -250,6 +346,11 @@ class RamSession(Session): except KeyError: pass + # added to remove obsolete lock objects + for id in list(self.locks): + if id not in self.cache: + self.locks.pop(id, None) + def _exists(self): return self.id in self.cache @@ -260,7 +361,7 @@ class RamSession(Session): self.cache[self.id] = (self._data, expiration_time) def _delete(self): - del self.cache[self.id] + self.cache.pop(self.id, None) def acquire_lock(self): """Acquire an exclusive lock on the currently-loaded session data.""" @@ -280,13 +381,16 @@ class RamSession(Session): class FileSession(Session): """Implementation of the File backend for sessions - storage_path: the folder where session data will be saved. Each session + storage_path + The folder where session data will be saved. Each session will be saved as pickle.dump(data, expiration_time) in its own file; the filename will be self.SESSION_PREFIX + self.id. + """ SESSION_PREFIX = 'session-' LOCK_SUFFIX = '.lock' + pickle_protocol = pickle.HIGHEST_PROTOCOL def __init__(self, id=None, **kwargs): # The 'storage_path' arg is required for file-based sessions. @@ -302,7 +406,7 @@ class FileSession(Session): # The 'storage_path' arg is required for file-based sessions. kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(cls, k, v) # Warn if any lock files exist at startup. @@ -342,7 +446,7 @@ class FileSession(Session): def _save(self, expiration_time): f = open(self._get_file_path(), "wb") try: - pickle.dump((self._data, expiration_time), f) + pickle.dump((self._data, expiration_time), f, self.pickle_protocol) finally: f.close() @@ -376,7 +480,7 @@ class FileSession(Session): def clean_up(self): """Clean up expired sessions.""" - now = datetime.datetime.now() + now = self.now() # Iterate over all session files in self.storage_path for fname in os.listdir(self.storage_path): if (fname.startswith(self.SESSION_PREFIX) @@ -405,7 +509,7 @@ class FileSession(Session): class PostgresqlSession(Session): """ Implementation of the PostgreSQL backend for sessions. It assumes - a table like this: + a table like this:: create table session ( id varchar(40), @@ -416,6 +520,8 @@ class PostgresqlSession(Session): You must provide your own get_db function. """ + pickle_protocol = pickle.HIGHEST_PROTOCOL + def __init__(self, id=None, **kwargs): Session.__init__(self, id, **kwargs) self.cursor = self.db.cursor() @@ -426,10 +532,10 @@ class PostgresqlSession(Session): This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(cls, k, v) - self.db = self.get_db() + cls.db = cls.get_db() setup = classmethod(setup) def __del__(self): @@ -457,7 +563,7 @@ class PostgresqlSession(Session): return data, expiration_time def _save(self, expiration_time): - pickled_data = pickle.dumps(self._data) + pickled_data = pickle.dumps(self._data, self.pickle_protocol) self.cursor.execute('update session set data = %s, ' 'expiration_time = %s where id = %s', (pickled_data, expiration_time, self.id)) @@ -482,7 +588,7 @@ class PostgresqlSession(Session): def clean_up(self): """Clean up expired sessions.""" self.cursor.execute('delete from session where expiration_time < %s', - (datetime.datetime.now(),)) + (self.now(),)) class MemcachedSession(Session): @@ -502,13 +608,26 @@ class MemcachedSession(Session): This should only be called once per process; this will be done automatically when using sessions.init (as the built-in Tool does). """ - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): setattr(cls, k, v) import memcache cls.cache = memcache.Client(cls.servers) setup = classmethod(setup) + def _get_id(self): + return self._id + def _set_id(self, value): + # This encode() call is where we differ from the superclass. + # Memcache keys MUST be byte strings, not unicode. + if isinstance(value, unicodestr): + value = value.encode('utf-8') + + self._id = value + for o in self.id_observers: + o(value) + id = property(_get_id, _set_id, doc="The current session ID.") + def _exists(self): self.mc_lock.acquire() try: @@ -558,21 +677,23 @@ def save(): if not hasattr(cherrypy.serving, "session"): return + request = cherrypy.serving.request + response = cherrypy.serving.response # Guard against running twice - if hasattr(cherrypy.request, "_sessionsaved"): + if hasattr(request, "_sessionsaved"): return - cherrypy.request._sessionsaved = True + request._sessionsaved = True - if cherrypy.response.stream: + if response.stream: # If the body is being streamed, we have to save the data # *after* the response has been written out - cherrypy.request.hooks.attach('on_end_request', cherrypy.session.save) + request.hooks.attach('on_end_request', cherrypy.session.save) else: # If the body is not being streamed, we save the data now # (so we can release the lock). - if isinstance(cherrypy.response.body, types.GeneratorType): - cherrypy.response.collapse_body() + if isinstance(response.body, types.GeneratorType): + response.collapse_body() cherrypy.session.save() save.failsafe = True @@ -587,29 +708,56 @@ close.priority = 90 def init(storage_type='ram', path=None, path_header=None, name='session_id', - timeout=60, domain=None, secure=False, clean_freq=5, **kwargs): + timeout=60, domain=None, secure=False, clean_freq=5, + persistent=True, httponly=False, debug=False, **kwargs): """Initialize session object (using cookies). - storage_type: one of 'ram', 'file', 'postgresql'. This will be used - to look up the corresponding class in cherrypy.lib.sessions + storage_type + One of 'ram', 'file', 'postgresql', 'memcached'. This will be + used to look up the corresponding class in cherrypy.lib.sessions globals. For example, 'file' will use the FileSession class. - path: the 'path' value to stick in the response cookie metadata. - path_header: if 'path' is None (the default), then the response + + path + The 'path' value to stick in the response cookie metadata. + + path_header + If 'path' is None (the default), then the response cookie 'path' will be pulled from request.headers[path_header]. - name: the name of the cookie. - timeout: the expiration timeout (in minutes) for both the cookie and - stored session data. - domain: the cookie domain. - secure: if False (the default) the cookie 'secure' value will not + + name + The name of the cookie. + + timeout + The expiration timeout (in minutes) for the stored session data. + If 'persistent' is True (the default), this is also the timeout + for the cookie. + + domain + The cookie domain. + + secure + If False (the default) the cookie 'secure' value will not be set. If True, the cookie 'secure' value will be set (to 1). - clean_freq (minutes): the poll rate for expired session cleanup. + + clean_freq (minutes) + The poll rate for expired session cleanup. + + persistent + If True (the default), the 'timeout' argument will be used + to expire the cookie. If False, the cookie will not have an expiry, + and the cookie will be a "session cookie" which expires when the + browser is closed. + + httponly + If False (the default) the cookie 'httponly' value will not be set. + If True, the cookie 'httponly' value will be set (to 1). Any additional kwargs will be bound to the new Session instance, and may be specific to the storage type. See the subclass of Session you're using for more information. """ - request = cherrypy.request + request = cherrypy.serving.request # Guard against running twice if hasattr(request, "_session_init_flag"): @@ -620,6 +768,9 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', id = None if name in request.cookie: id = request.cookie[name].value + if debug: + cherrypy.log('ID obtained from request.cookie: %r' % id, + 'TOOLS.SESSIONS') # Find the storage class and call setup (first time only). storage_class = storage_type.title() + 'Session' @@ -634,36 +785,62 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id', kwargs['timeout'] = timeout kwargs['clean_freq'] = clean_freq cherrypy.serving.session = sess = storage_class(id, **kwargs) + sess.debug = debug def update_cookie(id): """Update the cookie every time the session id changes.""" - cherrypy.response.cookie[name] = id + cherrypy.serving.response.cookie[name] = id sess.id_observers.append(update_cookie) # Create cherrypy.session which will proxy to cherrypy.serving.session if not hasattr(cherrypy, "session"): cherrypy.session = cherrypy._ThreadLocalProxy('session') + if persistent: + cookie_timeout = timeout + else: + # See http://support.microsoft.com/kb/223799/EN-US/ + # and http://support.mozilla.com/en-US/kb/Cookies + cookie_timeout = None set_response_cookie(path=path, path_header=path_header, name=name, - timeout=timeout, domain=domain, secure=secure) + timeout=cookie_timeout, domain=domain, secure=secure, + httponly=httponly) def set_response_cookie(path=None, path_header=None, name='session_id', - timeout=60, domain=None, secure=False): + timeout=60, domain=None, secure=False, httponly=False): """Set a response cookie for the client. - path: the 'path' value to stick in the response cookie metadata. - path_header: if 'path' is None (the default), then the response + path + the 'path' value to stick in the response cookie metadata. + + path_header + if 'path' is None (the default), then the response cookie 'path' will be pulled from request.headers[path_header]. - name: the name of the cookie. - timeout: the expiration timeout for the cookie. - domain: the cookie domain. - secure: if False (the default) the cookie 'secure' value will not + + name + the name of the cookie. + + timeout + the expiration timeout for the cookie. If 0 or other boolean + False, no 'expires' param will be set, and the cookie will be a + "session cookie" which expires when the browser is closed. + + domain + the cookie domain. + + secure + if False (the default) the cookie 'secure' value will not be set. If True, the cookie 'secure' value will be set (to 1). + + httponly + If False (the default) the cookie 'httponly' value will not be set. + If True, the cookie 'httponly' value will be set (to 1). + """ # Set response cookie - cookie = cherrypy.response.cookie + cookie = cherrypy.serving.response.cookie cookie[name] = cherrypy.serving.session.id - cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header) + cookie[name]['path'] = (path or cherrypy.serving.request.headers.get(path_header) or '/') # We'd like to use the "max-age" param as indicated in @@ -673,19 +850,22 @@ def set_response_cookie(path=None, path_header=None, name='session_id', ## cookie[name]['max-age'] = timeout * 60 if False and timeout: # Changed by Kovid, we want the user to have to # re-authenticate on browser restart - cookie[name]['expires'] = http.HTTPDate(time.time() + timeout) + e = time.time() + (timeout * 60) + cookie[name]['expires'] = httputil.HTTPDate(e) if domain is not None: cookie[name]['domain'] = domain if secure: cookie[name]['secure'] = 1 - + if httponly: + if not cookie[name].isReservedKey('httponly'): + raise ValueError("The httponly cookie token is not supported.") + cookie[name]['httponly'] = 1 def expire(): """Expire the current session cookie.""" - name = cherrypy.request.config.get('tools.sessions.name', 'session_id') + name = cherrypy.serving.request.config.get('tools.sessions.name', 'session_id') one_year = 60 * 60 * 24 * 365 - exp = time.gmtime(time.time() - one_year) - t = time.strftime("%a, %d-%b-%Y %H:%M:%S GMT", exp) - cherrypy.response.cookie[name]['expires'] = t + e = time.time() - one_year + cherrypy.serving.response.cookie[name]['expires'] = httputil.HTTPDate(e) diff --git a/src/cherrypy/lib/static.py b/src/cherrypy/lib/static.py index 2a5a9f6829..2d1423071b 100644 --- a/src/cherrypy/lib/static.py +++ b/src/cherrypy/lib/static.py @@ -1,20 +1,27 @@ +try: + from io import UnsupportedOperation +except ImportError: + UnsupportedOperation = object() +import logging import mimetypes mimetypes.init() mimetypes.types_map['.dwg']='image/x-dwg' mimetypes.types_map['.ico']='image/x-icon' +mimetypes.types_map['.bz2']='application/x-bzip2' +mimetypes.types_map['.gz']='application/x-gzip' import os import re import stat import time -import urllib import cherrypy -from cherrypy.lib import cptools, http, file_generator_limited +from cherrypy._cpcompat import ntob, unquote +from cherrypy.lib import cptools, httputil, file_generator_limited -def serve_file(path, content_type=None, disposition=None, name=None): - """Set status, headers, and body in order to serve the given file. +def serve_file(path, content_type=None, disposition=None, name=None, debug=False): + """Set status, headers, and body in order to serve the given path. The Content-Type header will be set to the content_type arg, if provided. If not provided, the Content-Type will be guessed by the file extension @@ -26,29 +33,36 @@ def serve_file(path, content_type=None, disposition=None, name=None): header will be written. """ - response = cherrypy.response + response = cherrypy.serving.response # If path is relative, users should fix it by making path absolute. # That is, CherryPy should not guess where the application root is. # It certainly should *not* use cwd (since CP may be invoked from a - # variety of paths). If using tools.static, you can make your relative - # paths become absolute by supplying a value for "tools.static.root". + # variety of paths). If using tools.staticdir, you can make your relative + # paths become absolute by supplying a value for "tools.staticdir.root". if not os.path.isabs(path): - raise ValueError("'%s' is not an absolute path." % path) + msg = "'%s' is not an absolute path." % path + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') + raise ValueError(msg) try: st = os.stat(path) except OSError: + if debug: + cherrypy.log('os.stat(%r) failed' % path, 'TOOLS.STATIC') raise cherrypy.NotFound() # Check if path is a directory. if stat.S_ISDIR(st.st_mode): # Let the caller deal with it as they like. + if debug: + cherrypy.log('%r is a directory' % path, 'TOOLS.STATIC') raise cherrypy.NotFound() # Set the Last-Modified response header, so that # modified-since validation code can work. - response.headers['Last-Modified'] = http.HTTPDate(st.st_mtime) + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) cptools.validate_since() if content_type is None: @@ -57,77 +71,159 @@ def serve_file(path, content_type=None, disposition=None, name=None): i = path.rfind('.') if i != -1: ext = path[i:].lower() - content_type = mimetypes.types_map.get(ext, "text/plain") - response.headers['Content-Type'] = content_type + content_type = mimetypes.types_map.get(ext, None) + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + cd = None if disposition is not None: if name is None: name = os.path.basename(path) cd = '%s; filename="%s"' % (disposition, name) response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') # Set Content-Length and use an iterable (file object) # this way CP won't load the whole file in memory - c_len = st.st_size - bodyfile = open(path, 'rb') + content_length = st.st_size + fileobj = open(path, 'rb') + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + +def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, + debug=False): + """Set status, headers, and body in order to serve the given file object. + + The Content-Type header will be set to the content_type arg, if provided. + + If disposition is not None, the Content-Disposition header will be set + to "; filename=". If name is None, 'filename' will + not be set. If disposition is None, no Content-Disposition header will + be written. + + CAUTION: If the request contains a 'Range' header, one or more seek()s will + be performed on the file object. This may cause undesired behavior if + the file object is not seekable. It could also produce undesired results + if the caller set the read position of the file object prior to calling + serve_fileobj(), expecting that the data would be served starting from that + position. + """ + + response = cherrypy.serving.response + + try: + st = os.fstat(fileobj.fileno()) + except AttributeError: + if debug: + cherrypy.log('os has no fstat attribute', 'TOOLS.STATIC') + content_length = None + except UnsupportedOperation: + content_length = None + else: + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) + cptools.validate_since() + content_length = st.st_size + + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + + cd = None + if disposition is not None: + if name is None: + cd = disposition + else: + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') + + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + +def _serve_fileobj(fileobj, content_type, content_length, debug=False): + """Internal. Set response.body to the given file object, perhaps ranged.""" + response = cherrypy.serving.response # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code - if cherrypy.request.protocol >= (1, 1): + request = cherrypy.serving.request + if request.protocol >= (1, 1): response.headers["Accept-Ranges"] = "bytes" - r = http.get_ranges(cherrypy.request.headers.get('Range'), c_len) + r = httputil.get_ranges(request.headers.get('Range'), content_length) if r == []: - response.headers['Content-Range'] = "bytes */%s" % c_len + response.headers['Content-Range'] = "bytes */%s" % content_length message = "Invalid Range (first-byte-pos greater than Content-Length)" + if debug: + cherrypy.log(message, 'TOOLS.STATIC') raise cherrypy.HTTPError(416, message) + if r: if len(r) == 1: # Return a single-part response. start, stop = r[0] - if stop > c_len: - stop = c_len + if stop > content_length: + stop = content_length r_len = stop - start + if debug: + cherrypy.log('Single part; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') response.status = "206 Partial Content" - response.headers['Content-Range'] = ("bytes %s-%s/%s" % - (start, stop - 1, c_len)) + response.headers['Content-Range'] = ( + "bytes %s-%s/%s" % (start, stop - 1, content_length)) response.headers['Content-Length'] = r_len - bodyfile.seek(start) - response.body = file_generator_limited(bodyfile, r_len) + fileobj.seek(start) + response.body = file_generator_limited(fileobj, r_len) else: # Return a multipart/byteranges response. response.status = "206 Partial Content" - import mimetools - boundary = mimetools.choose_boundary() + try: + # Python 3 + from email.generator import _make_boundary as choose_boundary + except ImportError: + # Python 2 + from mimetools import choose_boundary + boundary = choose_boundary() ct = "multipart/byteranges; boundary=%s" % boundary response.headers['Content-Type'] = ct - if response.headers.has_key("Content-Length"): + if "Content-Length" in response.headers: # Delete Content-Length header so finalize() recalcs it. del response.headers["Content-Length"] def file_ranges(): # Apache compatibility: - yield "\r\n" + yield ntob("\r\n") for start, stop in r: - yield "--" + boundary - yield "\r\nContent-type: %s" % content_type - yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" - % (start, stop - 1, c_len)) - bodyfile.seek(start) - for chunk in file_generator_limited(bodyfile, stop-start): + if debug: + cherrypy.log('Multipart; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') + yield ntob("--" + boundary, 'ascii') + yield ntob("\r\nContent-type: %s" % content_type, 'ascii') + yield ntob("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" + % (start, stop - 1, content_length), 'ascii') + fileobj.seek(start) + for chunk in file_generator_limited(fileobj, stop-start): yield chunk - yield "\r\n" + yield ntob("\r\n") # Final boundary - yield "--" + boundary + "--" + yield ntob("--" + boundary + "--", 'ascii') # Apache compatibility: - yield "\r\n" + yield ntob("\r\n") response.body = file_ranges() + return response.body else: - response.headers['Content-Length'] = c_len - response.body = bodyfile - else: - response.headers['Content-Length'] = c_len - response.body = bodyfile + if debug: + cherrypy.log('No byteranges requested', 'TOOLS.STATIC') + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + response.headers['Content-Length'] = content_length + response.body = fileobj return response.body def serve_download(path, name=None): @@ -136,7 +232,10 @@ def serve_download(path, name=None): return serve_file(path, "application/x-download", "attachment", name) -def _attempt(filename, content_types): +def _attempt(filename, content_types, debug=False): + if debug: + cherrypy.log('Attempting %r (content_types %r)' % + (filename, content_types), 'TOOLS.STATICDIR') try: # you can set the content types for a # complete directory per extension @@ -144,33 +243,45 @@ def _attempt(filename, content_types): if content_types: r, ext = os.path.splitext(filename) content_type = content_types.get(ext[1:], None) - serve_file(filename, content_type=content_type) + serve_file(filename, content_type=content_type, debug=debug) return True except cherrypy.NotFound: # If we didn't find the static file, continue handling the # request. We might find a dynamic handler instead. + if debug: + cherrypy.log('NotFound', 'TOOLS.STATICFILE') return False -def staticdir(section, dir, root="", match="", content_types=None, index=""): +def staticdir(section, dir, root="", match="", content_types=None, index="", + debug=False): """Serve a static resource from the given (root +) dir. - If 'match' is given, request.path_info will be searched for the given - regular expression before attempting to serve static content. + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. - If content_types is given, it should be a Python dictionary of - {file-extension: content-type} pairs, where 'file-extension' is - a string (e.g. "gif") and 'content-type' is the value to write - out in the Content-Type response header (e.g. "image/gif"). + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). - If 'index' is provided, it should be the (relative) name of a file to - serve for directory requests. For example, if the dir argument is - '/home/me', the Request-URI is 'myapp', and the index arg is - 'index.html', the file '/home/me/myapp/index.html' will be sought. + index + If provided, it should be the (relative) name of a file to + serve for directory requests. For example, if the dir argument is + '/home/me', the Request-URI is 'myapp', and the index arg is + 'index.html', the file '/home/me/myapp/index.html' will be sought. """ - if cherrypy.request.method not in ('GET', 'HEAD'): + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICDIR') return False - if match and not re.search(match, cherrypy.request.path_info): + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICDIR') return False # Allow the use of '~' to refer to a user's home directory. @@ -180,6 +291,8 @@ def staticdir(section, dir, root="", match="", content_types=None, index=""): if not os.path.isabs(dir): if not root: msg = "Static dir requires an absolute dir (or root)." + if debug: + cherrypy.log(msg, 'TOOLS.STATICDIR') raise ValueError(msg) dir = os.path.join(root, dir) @@ -188,11 +301,14 @@ def staticdir(section, dir, root="", match="", content_types=None, index=""): if section == 'global': section = "/" section = section.rstrip(r"\/") - branch = cherrypy.request.path_info[len(section) + 1:] - branch = urllib.unquote(branch.lstrip(r"\/")) + branch = request.path_info[len(section) + 1:] + branch = unquote(branch.lstrip(r"\/")) # If branch is "", filename will end in a slash filename = os.path.join(dir, branch) + if debug: + cherrypy.log('Checking file %r to fulfill %r' % + (filename, request.path_info), 'TOOLS.STATICDIR') # There's a chance that the branch pulled from the URL might # have ".." or similar uplevel attacks in it. Check that the final @@ -206,31 +322,42 @@ def staticdir(section, dir, root="", match="", content_types=None, index=""): if index: handled = _attempt(os.path.join(filename, index), content_types) if handled: - cherrypy.request.is_index = filename[-1] in (r"\/") + request.is_index = filename[-1] in (r"\/") return handled -def staticfile(filename, root=None, match="", content_types=None): +def staticfile(filename, root=None, match="", content_types=None, debug=False): """Serve a static resource from the given (root +) filename. - If 'match' is given, request.path_info will be searched for the given - regular expression before attempting to serve static content. + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). - If content_types is given, it should be a Python dictionary of - {file-extension: content-type} pairs, where 'file-extension' is - a string (e.g. "gif") and 'content-type' is the value to write - out in the Content-Type response header (e.g. "image/gif"). """ - if cherrypy.request.method not in ('GET', 'HEAD'): + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICFILE') return False - if match and not re.search(match, cherrypy.request.path_info): + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICFILE') return False # If filename is relative, make absolute using "root". if not os.path.isabs(filename): if not root: msg = "Static tool requires an absolute filename (got '%s')." % filename + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') raise ValueError(msg) filename = os.path.join(root, filename) - return _attempt(filename, content_types) + return _attempt(filename, content_types, debug=debug) diff --git a/src/cherrypy/lib/tidy.py b/src/cherrypy/lib/tidy.py deleted file mode 100644 index ed337c6a1e..0000000000 --- a/src/cherrypy/lib/tidy.py +++ /dev/null @@ -1,184 +0,0 @@ -"""Functions to run cherrypy.response through Tidy or NSGML.""" - -import cgi -import os -import StringIO -import traceback - -import cherrypy - -def tidy(temp_dir, tidy_path, strict_xml=False, errors_to_ignore=None, - indent=False, wrap=False, warnings=True): - """Run cherrypy.response through Tidy. - - If either 'indent' or 'wrap' are specified, then response.body will be - set to the output of tidy. Otherwise, only errors (including warnings, - if warnings is True) will change the body. - - Note that we use the standalone Tidy tool rather than the python - mxTidy module. This is because this module does not seem to be - stable and it crashes on some HTML pages (which means that the - server would also crash) - """ - response = cherrypy.response - - # the tidy tool, by its very nature it's not generator friendly, - # so we just collapse the body and work with it. - orig_body = response.collapse_body() - - fct = response.headers.get('Content-Type', '') - ct = fct.split(';')[0] - encoding = '' - i = fct.find('charset=') - if i != -1: - encoding = fct[i + 8:] - - if ct == 'text/html': - page_file = os.path.join(temp_dir, 'page.html') - open(page_file, 'wb').write(orig_body) - - out_file = os.path.join(temp_dir, 'tidy.out') - err_file = os.path.join(temp_dir, 'tidy.err') - tidy_enc = encoding.replace('-', '') - if tidy_enc: - tidy_enc = '-' + tidy_enc - - strict_xml = ("", " -xml")[bool(strict_xml)] - - if indent: - indent = ' -indent' - else: - indent = '' - - if wrap is False: - wrap = '' - else: - try: - wrap = ' -wrap %d' % int(tidyWrap) - except: - wrap = '' - - result = os.system('"%s" %s%s%s%s -f %s -o %s %s' % - (tidy_path, tidy_enc, strict_xml, indent, wrap, - err_file, out_file, page_file)) - use_output = bool(indent or wrap) and not result - if use_output: - output = open(out_file, 'rb').read() - - new_errs = [] - for err in open(err_file, 'rb').read().splitlines(): - if (err.find('Error') != -1 or - (warnings and err.find('Warning') != -1)): - ignore = 0 - for err_ign in errors_to_ignore or []: - if err.find(err_ign) != -1: - ignore = 1 - break - if not ignore: - new_errs.append(err) - - if new_errs: - response.body = wrong_content('
    '.join(new_errs), orig_body) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - return - elif strict_xml: - # The HTML is OK, but is it valid XML? - # Use elementtree to parse XML - from elementtree.ElementTree import parse - tag_list = ['nbsp', 'quot'] - for tag in tag_list: - orig_body = orig_body.replace('&' + tag + ';', tag.upper()) - - if encoding: - enctag = '' % encoding - orig_body = enctag + orig_body - - f = StringIO.StringIO(orig_body) - try: - tree = parse(f) - except: - # Wrong XML - body_file = StringIO.StringIO() - traceback.print_exc(file = body_file) - body_file = '
    '.join(body_file.getvalue()) - response.body = wrong_content(body_file, orig_body, "XML") - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - return - - if use_output: - response.body = [output] - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - -def html_space(text): - """Escape text, replacing space with nbsp and tab with 4 nbsp's.""" - return cgi.escape(text).replace('\t', ' ').replace(' ', ' ') - -def html_break(text): - """Escape text, replacing newline with HTML br element.""" - return cgi.escape(text).replace('\n', '
    ') - -def wrong_content(header, body, content_type="HTML"): - output = ["Wrong %s:
    %s
    " % (content_type, html_break(header))] - for i, line in enumerate(body.splitlines()): - output.append("%03d - %s" % (i + 1, html_space(line))) - return "
    ".join(output) - - -def nsgmls(temp_dir, nsgmls_path, catalog_path, errors_to_ignore=None): - response = cherrypy.response - - # the tidy tool, by its very nature it's not generator friendly, - # so we just collect the body and work with it. - orig_body = response.collapse_body() - - fct = response.headers.get('Content-Type', '') - ct = fct.split(';')[0] - encoding = '' - i = fct.find('charset=') - if i != -1: - encoding = fct[i + 8:] - if ct == 'text/html': - # Remove bits of Javascript (nsgmls doesn't seem to handle - # them correctly (for instance, if ', i) - if j == -1: - break - orig_body = orig_body[:i] + orig_body[j+9:] - - page_file = os.path.join(temp_dir, 'page.html') - open(page_file, 'wb').write(orig_body) - - err_file = os.path.join(temp_dir, 'nsgmls.err') - command = ('%s -c%s -f%s -s -E10 %s' % - (nsgmls_path, catalog_path, err_file, page_file)) - command = command.replace('\\', '/') - os.system(command) - errs = open(err_file, 'rb').read() - - new_errs = [] - for err in errs.splitlines(): - ignore = False - for err_ign in errors_to_ignore or []: - if err.find(err_ign) != -1: - ignore = True - break - if not ignore: - new_errs.append(err) - - if new_errs: - response.body = wrong_content('
    '.join(new_errs), orig_body) - if response.headers.has_key("Content-Length"): - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - diff --git a/src/cherrypy/lib/wsgiapp.py b/src/cherrypy/lib/wsgiapp.py deleted file mode 100644 index 8aeb5755d9..0000000000 --- a/src/cherrypy/lib/wsgiapp.py +++ /dev/null @@ -1,77 +0,0 @@ -"""A CherryPy tool for hosting a foreign WSGI application.""" - -import sys -import warnings - -import cherrypy - - -# is this sufficient for start_response? -def start_response(status, response_headers, exc_info=None): - cherrypy.response.status = status - headers_dict = dict(response_headers) - cherrypy.response.headers.update(headers_dict) - -def make_environ(): - """grabbed some of below from wsgiserver.py - - for hosting WSGI apps in non-WSGI environments (yikes!) - """ - - request = cherrypy.request - - # create and populate the wsgi environ - environ = dict() - environ["wsgi.version"] = (1,0) - environ["wsgi.url_scheme"] = request.scheme - environ["wsgi.input"] = request.rfile - environ["wsgi.errors"] = sys.stderr - environ["wsgi.multithread"] = True - environ["wsgi.multiprocess"] = False - environ["wsgi.run_once"] = False - environ["REQUEST_METHOD"] = request.method - environ["SCRIPT_NAME"] = request.script_name - environ["PATH_INFO"] = request.path_info - environ["QUERY_STRING"] = request.query_string - environ["SERVER_PROTOCOL"] = request.protocol - environ["SERVER_NAME"] = request.local.name - environ["SERVER_PORT"] = request.local.port - environ["REMOTE_HOST"] = request.remote.name - environ["REMOTE_ADDR"] = request.remote.ip - environ["REMOTE_PORT"] = request.remote.port - # then all the http headers - headers = request.headers - environ["CONTENT_TYPE"] = headers.get("Content-type", "") - environ["CONTENT_LENGTH"] = headers.get("Content-length", "") - for (k, v) in headers.iteritems(): - envname = "HTTP_" + k.upper().replace("-","_") - environ[envname] = v - return environ - - -def run(app, env=None): - """Run the given WSGI app and set response.body to its output.""" - warnings.warn("This module is deprecated and will be removed in " - "Cherrypy 3.2. See http://www.cherrypy.org/ticket/700 " - "for more information.") - - try: - environ = cherrypy.request.wsgi_environ.copy() - environ['SCRIPT_NAME'] = cherrypy.request.script_name - environ['PATH_INFO'] = cherrypy.request.path_info - except AttributeError: - environ = make_environ() - - if env: - environ.update(env) - - # run the wsgi app and have it set response.body - response = app(environ, start_response) - try: - cherrypy.response.body = [x for x in response] - finally: - if hasattr(response, "close"): - response.close() - - return True - diff --git a/src/cherrypy/lib/xmlrpc.py b/src/cherrypy/lib/xmlrpcutil.py similarity index 78% rename from src/cherrypy/lib/xmlrpc.py rename to src/cherrypy/lib/xmlrpcutil.py index 59ee0278fe..9a44464bc0 100644 --- a/src/cherrypy/lib/xmlrpc.py +++ b/src/cherrypy/lib/xmlrpcutil.py @@ -1,13 +1,19 @@ import sys import cherrypy +from cherrypy._cpcompat import ntob +def get_xmlrpclib(): + try: + import xmlrpc.client as x + except ImportError: + import xmlrpclib as x + return x def process_body(): """Return (params, method) from request body.""" try: - import xmlrpclib - return xmlrpclib.loads(cherrypy.request.body.read()) + return get_xmlrpclib().loads(cherrypy.request.body.read()) except Exception: return ('ERROR PARAMS', ), 'ERRORMETHOD' @@ -29,13 +35,13 @@ def _set_response(body): # as a "Protocol Error", we'll just return 200 every time. response = cherrypy.response response.status = '200 OK' - response.body = body + response.body = ntob(body, 'utf-8') response.headers['Content-Type'] = 'text/xml' response.headers['Content-Length'] = len(body) def respond(body, encoding='utf-8', allow_none=0): - import xmlrpclib + xmlrpclib = get_xmlrpclib() if not isinstance(body, xmlrpclib.Fault): body = (body,) _set_response(xmlrpclib.dumps(body, methodresponse=1, @@ -44,6 +50,6 @@ def respond(body, encoding='utf-8', allow_none=0): def on_error(*args, **kwargs): body = str(sys.exc_info()[1]) - import xmlrpclib + xmlrpclib = get_xmlrpclib() _set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body))) diff --git a/src/cherrypy/process/plugins.py b/src/cherrypy/process/plugins.py index 0e8b4bf919..ba618a0bd0 100644 --- a/src/cherrypy/process/plugins.py +++ b/src/cherrypy/process/plugins.py @@ -2,19 +2,38 @@ import os import re -try: - set -except NameError: - from sets import Set as set import signal as _signal import sys import time import threading +from cherrypy._cpcompat import basestring, get_daemon, get_thread_ident, ntob, set + +# _module__file__base is used by Autoreload to make +# absolute any filenames retrieved from sys.modules which are not +# already absolute paths. This is to work around Python's quirk +# of importing the startup script and using a relative filename +# for it in sys.modules. +# +# Autoreload examines sys.modules afresh every time it runs. If an application +# changes the current directory by executing os.chdir(), then the next time +# Autoreload runs, it will not be able to find any filenames which are +# not absolute paths, because the current directory is not the same as when the +# module was first imported. Autoreload will then wrongly conclude the file has +# "changed", and initiate the shutdown/re-exec sequence. +# See ticket #917. +# For this workaround to have a decent probability of success, this module +# needs to be imported as early as possible, before the app has much chance +# to change the working directory. +_module__file__base = os.getcwd() + class SimplePlugin(object): """Plugin base class which auto-subscribes methods for known channels.""" + bus = None + """A :class:`Bus `, usually cherrypy.engine.""" + def __init__(self, bus): self.bus = bus @@ -39,16 +58,33 @@ class SimplePlugin(object): class SignalHandler(object): """Register bus channels (and listeners) for system signals. - By default, instantiating this object subscribes the following signals - and listeners: + You can modify what signals your application listens for, and what it does + when it receives signals, by modifying :attr:`SignalHandler.handlers`, + a dict of {signal name: callback} pairs. The default set is:: - TERM: bus.exit - HUP : bus.restart - USR1: bus.graceful + handlers = {'SIGTERM': self.bus.exit, + 'SIGHUP': self.handle_SIGHUP, + 'SIGUSR1': self.bus.graceful, + } + + The :func:`SignalHandler.handle_SIGHUP`` method calls + :func:`bus.restart()` + if the process is daemonized, but + :func:`bus.exit()` + if the process is attached to a TTY. This is because Unix window + managers tend to send SIGHUP to terminal windows when the user closes them. + + Feel free to add signals which are not available on every platform. The + :class:`SignalHandler` will ignore errors raised from attempting to register + handlers for unknown signals. """ - # Map from signal numbers to names + handlers = {} + """A map from signal names (e.g. 'SIGTERM') to handlers (e.g. bus.exit).""" + signals = {} + """A map from signal numbers to names.""" + for k, v in vars(_signal).items(): if k.startswith('SIG') and not k.startswith('SIG_'): signals[v] = k @@ -61,18 +97,32 @@ class SignalHandler(object): 'SIGHUP': self.handle_SIGHUP, 'SIGUSR1': self.bus.graceful, } - + + if sys.platform[:4] == 'java': + del self.handlers['SIGUSR1'] + self.handlers['SIGUSR2'] = self.bus.graceful + self.bus.log("SIGUSR1 cannot be set on the JVM platform. " + "Using SIGUSR2 instead.") + self.handlers['SIGINT'] = self._jython_SIGINT_handler + self._previous_handlers = {} + def _jython_SIGINT_handler(self, signum=None, frame=None): + # See http://bugs.jython.org/issue1313 + self.bus.log('Keyboard Interrupt: shutting down bus') + self.bus.exit() + def subscribe(self): - for sig, func in self.handlers.iteritems(): + """Subscribe self.handlers to signals.""" + for sig, func in self.handlers.items(): try: self.set_handler(sig, func) except ValueError: pass def unsubscribe(self): - for signum, handler in self._previous_handlers.iteritems(): + """Unsubscribe self.handlers from signals.""" + for signum, handler in self._previous_handlers.items(): signame = self.signals[signum] if handler is None: @@ -126,6 +176,7 @@ class SignalHandler(object): self.bus.publish(signame) def handle_SIGHUP(self): + """Restart if daemonized, else exit.""" if os.isatty(sys.stdin.fileno()): # not daemonized (may be foreground or background) self.bus.log("SIGHUP caught but not daemonized. Exiting.") @@ -165,7 +216,8 @@ class DropPrivileges(SimplePlugin): elif isinstance(val, basestring): val = pwd.getpwnam(val)[2] self._uid = val - uid = property(_get_uid, _set_uid, doc="The uid under which to run.") + uid = property(_get_uid, _set_uid, + doc="The uid under which to run. Availability: Unix.") def _get_gid(self): return self._gid @@ -178,7 +230,8 @@ class DropPrivileges(SimplePlugin): elif isinstance(val, basestring): val = grp.getgrnam(val)[2] self._gid = val - gid = property(_get_gid, _set_gid, doc="The gid under which to run.") + gid = property(_get_gid, _set_gid, + doc="The gid under which to run. Availability: Unix.") def _get_umask(self): return self._umask @@ -191,7 +244,12 @@ class DropPrivileges(SimplePlugin): level=30) val = None self._umask = val - umask = property(_get_umask, _set_umask, doc="The umask under which to run.") + umask = property(_get_umask, _set_umask, + doc="""The default permission mode for newly created files and directories. + + Usually expressed in octal format, for example, ``0644``. + Availability: Unix, Windows. + """) def start(self): # uid/gid @@ -216,6 +274,7 @@ class DropPrivileges(SimplePlugin): self.bus.log('Started as uid: %r gid: %r' % current_ids()) if self.gid is not None: os.setgid(self.gid) + os.setgroups([]) if self.uid is not None: os.setuid(self.uid) self.bus.log('Running as uid: %r gid: %r' % current_ids()) @@ -242,8 +301,8 @@ class DropPrivileges(SimplePlugin): class Daemonizer(SimplePlugin): """Daemonize the running script. - Use this with a Web Site Process Bus via: - + Use this with a Web Site Process Bus via:: + Daemonizer(bus).subscribe() When this component finishes, the process is completely decoupled from @@ -296,8 +355,9 @@ class Daemonizer(SimplePlugin): # This is the first parent. Exit, now that we've forked. self.bus.log('Forking once.') os._exit(0) - except OSError, exc: + except OSError: # Python raises OSError rather than returning negative numbers. + exc = sys.exc_info()[1] sys.exit("%s: fork #1 failed: (%d) %s\n" % (sys.argv[0], exc.errno, exc.strerror)) @@ -309,7 +369,8 @@ class Daemonizer(SimplePlugin): if pid > 0: self.bus.log('Forking twice.') os._exit(0) # Exit second parent - except OSError, exc: + except OSError: + exc = sys.exc_info()[1] sys.exit("%s: fork #2 failed: (%d) %s\n" % (sys.argv[0], exc.errno, exc.strerror)) @@ -318,7 +379,7 @@ class Daemonizer(SimplePlugin): si = open(self.stdin, "r") so = open(self.stdout, "a+") - se = open(self.stderr, "a+", 0) + se = open(self.stderr, "a+") # os.dup2(fd, fd2) will close fd2 if necessary, # so we don't explicitly close stdin/out/err. @@ -345,7 +406,7 @@ class PIDFile(SimplePlugin): if self.finalized: self.bus.log('PID %r already written to %r.' % (pid, self.pidfile)) else: - open(self.pidfile, "wb").write(str(pid)) + open(self.pidfile, "wb").write(ntob("%s" % pid, 'utf8')) self.bus.log('PID %r written to %r.' % (pid, self.pidfile)) self.finalized = True start.priority = 70 @@ -361,38 +422,94 @@ class PIDFile(SimplePlugin): class PerpetualTimer(threading._Timer): - """A subclass of threading._Timer whose run() method repeats.""" + """A responsive subclass of threading._Timer whose run() method repeats. + + Use this timer only when you really need a very interruptible timer; + this checks its 'finished' condition up to 20 times a second, which can + results in pretty high CPU usage + """ def run(self): while True: self.finished.wait(self.interval) if self.finished.isSet(): return - self.function(*self.args, **self.kwargs) + try: + self.function(*self.args, **self.kwargs) + except Exception: + self.bus.log("Error in perpetual timer thread function %r." % + self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + +class BackgroundTask(threading.Thread): + """A subclass of threading.Thread whose run() method repeats. + + Use this class for most repeating tasks. It uses time.sleep() to wait + for each interval, which isn't very responsive; that is, even if you call + self.cancel(), you'll have to wait until the sleep() call finishes before + the thread stops. To compensate, it defaults to being daemonic, which means + it won't delay stopping the whole process. + """ + + def __init__(self, interval, function, args=[], kwargs={}, bus=None): + threading.Thread.__init__(self) + self.interval = interval + self.function = function + self.args = args + self.kwargs = kwargs + self.running = False + self.bus = bus + + def cancel(self): + self.running = False + + def run(self): + self.running = True + while self.running: + time.sleep(self.interval) + if not self.running: + return + try: + self.function(*self.args, **self.kwargs) + except Exception: + if self.bus: + self.bus.log("Error in background task thread function %r." + % self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + def _set_daemon(self): + return True class Monitor(SimplePlugin): - """WSPBus listener to periodically run a callback in its own thread. + """WSPBus listener to periodically run a callback in its own thread.""" - bus: a Web Site Process Bus object. - callback: the function to call at intervals. - frequency: the time in seconds between callback runs. - """ + callback = None + """The function to call at intervals.""" frequency = 60 + """The time in seconds between callback runs.""" - def __init__(self, bus, callback, frequency=60): + thread = None + """A :class:`BackgroundTask` thread.""" + + def __init__(self, bus, callback, frequency=60, name=None): SimplePlugin.__init__(self, bus) self.callback = callback self.frequency = frequency self.thread = None + self.name = name def start(self): - """Start our callback in its own perpetual timer thread.""" + """Start our callback in its own background thread.""" if self.frequency > 0: - threadname = self.__class__.__name__ + threadname = self.name or self.__class__.__name__ if self.thread is None: - self.thread = PerpetualTimer(self.frequency, self.callback) + self.thread = BackgroundTask(self.frequency, self.callback, + bus = self.bus) self.thread.setName(threadname) self.thread.start() self.bus.log("Started monitor thread %r." % threadname) @@ -401,28 +518,54 @@ class Monitor(SimplePlugin): start.priority = 70 def stop(self): - """Stop our callback's perpetual timer thread.""" + """Stop our callback's background task thread.""" if self.thread is None: - self.bus.log("No thread running for %s." % self.__class__.__name__) + self.bus.log("No thread running for %s." % self.name or self.__class__.__name__) else: if self.thread is not threading.currentThread(): name = self.thread.getName() self.thread.cancel() - self.thread.join() + if not get_daemon(self.thread): + self.bus.log("Joining %r" % name) + self.thread.join() self.bus.log("Stopped thread %r." % name) self.thread = None def graceful(self): - """Stop the callback's perpetual timer thread and restart it.""" + """Stop the callback's background task thread and restart it.""" self.stop() self.start() class Autoreloader(Monitor): - """Monitor which re-executes the process when files change.""" + """Monitor which re-executes the process when files change. + + This :ref:`plugin` restarts the process (via :func:`os.execv`) + if any of the files it monitors change (or is deleted). By default, the + autoreloader monitors all imported modules; you can add to the + set by adding to ``autoreload.files``:: + + cherrypy.engine.autoreload.files.add(myFile) + + If there are imported files you do *not* wish to monitor, you can adjust the + ``match`` attribute, a regular expression. For example, to stop monitoring + cherrypy itself:: + + cherrypy.engine.autoreload.match = r'^(?!cherrypy).+' + + Like all :class:`Monitor` plugins, + the autoreload plugin takes a ``frequency`` argument. The default is + 1 second; that is, the autoreloader will examine files once each second. + """ + + files = None + """The set of files to poll for modifications.""" frequency = 1 + """The interval in seconds at which to poll for modified files.""" + match = '.*' + """A regular expression by which to match filenames.""" def __init__(self, bus, frequency=1, match='.*'): self.mtimes = {} @@ -431,24 +574,30 @@ class Autoreloader(Monitor): Monitor.__init__(self, bus, self.run, frequency) def start(self): - """Start our own perpetual timer thread for self.run.""" + """Start our own background task thread for self.run.""" if self.thread is None: self.mtimes = {} Monitor.start(self) start.priority = 70 - def run(self): - """Reload the process if registered files have been modified.""" - sysfiles = set() + def sysfiles(self): + """Return a Set of sys.modules filenames to monitor.""" + files = set() for k, m in sys.modules.items(): if re.match(self.match, k): - if hasattr(m, '__loader__'): - if hasattr(m.__loader__, 'archive'): - k = m.__loader__.archive - k = getattr(m, '__file__', None) - sysfiles.add(k) - - for filename in sysfiles | self.files: + if hasattr(m, '__loader__') and hasattr(m.__loader__, 'archive'): + f = m.__loader__.archive + else: + f = getattr(m, '__file__', None) + if f is not None and not os.path.isabs(f): + # ensure absolute paths so a os.chdir() in the app doesn't break me + f = os.path.normpath(os.path.join(_module__file__base, f)) + files.add(f) + return files + + def run(self): + """Reload the process if registered files have been modified.""" + for filename in self.sysfiles() | self.files: if filename: if filename.endswith('.pyc'): filename = filename[:-1] @@ -493,21 +642,26 @@ class ThreadManager(SimplePlugin): 'stop_thread' listeners for you when it stops. """ + threads = None + """A map of {thread ident: index number} pairs.""" + def __init__(self, bus): self.threads = {} SimplePlugin.__init__(self, bus) self.bus.listeners.setdefault('acquire_thread', set()) + self.bus.listeners.setdefault('start_thread', set()) self.bus.listeners.setdefault('release_thread', set()) - + self.bus.listeners.setdefault('stop_thread', set()) + def acquire_thread(self): """Run 'start_thread' listeners for the current thread. If the current thread has already been seen, any 'start_thread' listeners will not be run again. """ - thread_ident = threading._get_ident() + thread_ident = get_thread_ident() if thread_ident not in self.threads: - # We can't just use _get_ident as the thread ID + # We can't just use get_ident as the thread ID # because some platforms reuse thread ID's. i = len(self.threads) + 1 self.threads[thread_ident] = i @@ -515,14 +669,14 @@ class ThreadManager(SimplePlugin): def release_thread(self): """Release the current thread and run 'stop_thread' listeners.""" - thread_ident = threading._get_ident() + thread_ident = get_thread_ident() i = self.threads.pop(thread_ident, None) if i is not None: self.bus.publish('stop_thread', i) def stop(self): """Release all threads and run all 'stop_thread' listeners.""" - for thread_ident, i in self.threads.iteritems(): + for thread_ident, i in self.threads.items(): self.bus.publish('stop_thread', i) self.threads.clear() graceful = stop diff --git a/src/cherrypy/process/servers.py b/src/cherrypy/process/servers.py index da469bfad2..456da1e5f5 100644 --- a/src/cherrypy/process/servers.py +++ b/src/cherrypy/process/servers.py @@ -1,5 +1,117 @@ -"""Adapt an HTTP server.""" +""" +Starting in CherryPy 3.1, cherrypy.server is implemented as an +:ref:`Engine Plugin`. It's an instance of +:class:`cherrypy._cpserver.Server`, which is a subclass of +:class:`cherrypy.process.servers.ServerAdapter`. The ``ServerAdapter`` class +is designed to control other servers, as well. +Multiple servers/ports +====================== + +If you need to start more than one HTTP server (to serve on multiple ports, or +protocols, etc.), you can manually register each one and then start them all +with engine.start:: + + s1 = ServerAdapter(cherrypy.engine, MyWSGIServer(host='0.0.0.0', port=80)) + s2 = ServerAdapter(cherrypy.engine, another.HTTPServer(host='127.0.0.1', SSL=True)) + s1.subscribe() + s2.subscribe() + cherrypy.engine.start() + +.. index:: SCGI + +FastCGI/SCGI +============ + +There are also Flup\ **F**\ CGIServer and Flup\ **S**\ CGIServer classes in +:mod:`cherrypy.process.servers`. To start an fcgi server, for example, +wrap an instance of it in a ServerAdapter:: + + addr = ('0.0.0.0', 4000) + f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=addr) + s = servers.ServerAdapter(cherrypy.engine, httpserver=f, bind_addr=addr) + s.subscribe() + +The :doc:`cherryd` startup script will do the above for +you via its `-f` flag. +Note that you need to download and install `flup `_ +yourself, whether you use ``cherryd`` or not. + +.. _fastcgi: +.. index:: FastCGI + +FastCGI +------- + +A very simple setup lets your cherry run with FastCGI. +You just need the flup library, +plus a running Apache server (with ``mod_fastcgi``) or lighttpd server. + +CherryPy code +^^^^^^^^^^^^^ + +hello.py:: + + #!/usr/bin/python + import cherrypy + + class HelloWorld: + \"""Sample request handler class.\""" + def index(self): + return "Hello world!" + index.exposed = True + + cherrypy.tree.mount(HelloWorld()) + # CherryPy autoreload must be disabled for the flup server to work + cherrypy.config.update({'engine.autoreload_on':False}) + +Then run :doc:`/deployguide/cherryd` with the '-f' arg:: + + cherryd -c -d -f -i hello.py + +Apache +^^^^^^ + +At the top level in httpd.conf:: + + FastCgiIpcDir /tmp + FastCgiServer /path/to/cherry.fcgi -idle-timeout 120 -processes 4 + +And inside the relevant VirtualHost section:: + + # FastCGI config + AddHandler fastcgi-script .fcgi + ScriptAliasMatch (.*$) /path/to/cherry.fcgi$1 + +Lighttpd +^^^^^^^^ + +For `Lighttpd `_ you can follow these +instructions. Within ``lighttpd.conf`` make sure ``mod_fastcgi`` is +active within ``server.modules``. Then, within your ``$HTTP["host"]`` +directive, configure your fastcgi script like the following:: + + $HTTP["url"] =~ "" { + fastcgi.server = ( + "/" => ( + "script.fcgi" => ( + "bin-path" => "/path/to/your/script.fcgi", + "socket" => "/tmp/script.sock", + "check-local" => "disable", + "disable-time" => 1, + "min-procs" => 1, + "max-procs" => 1, # adjust as needed + ), + ), + ) + } # end of $HTTP["url"] =~ "^/" + +Please see `Lighttpd FastCGI Docs +`_ for an explanation +of the possible configuration options. +""" + +import sys import time @@ -34,7 +146,9 @@ class ServerAdapter(object): def start(self): """Start the HTTP server.""" - if isinstance(self.bind_addr, tuple): + if self.bind_addr is None: + on_what = "unknown interface (dynamic?)" + elif isinstance(self.bind_addr, tuple): host, port = self.bind_addr on_what = "%s:%s" % (host, port) else: @@ -71,17 +185,16 @@ class ServerAdapter(object): """ try: self.httpserver.start() - except KeyboardInterrupt, exc: + except KeyboardInterrupt: self.bus.log(" hit: shutting down HTTP server") - self.interrupt = exc + self.interrupt = sys.exc_info()[1] self.bus.exit() - except SystemExit, exc: + except SystemExit: self.bus.log("SystemExit raised: shutting down HTTP server") - self.interrupt = exc + self.interrupt = sys.exc_info()[1] self.bus.exit() raise except: - import sys self.interrupt = sys.exc_info()[1] self.bus.log("Error in HTTP server: shutting down", traceback=True, level=40) @@ -120,10 +233,40 @@ class ServerAdapter(object): self.start() +class FlupCGIServer(object): + """Adapter for a flup.server.cgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + self.ready = False + + def start(self): + """Start the CGI server.""" + # We have to instantiate the server class here because its __init__ + # starts a threadpool. If we do it too early, daemonize won't work. + from flup.server.cgi import WSGIServer + + self.cgiserver = WSGIServer(*self.args, **self.kwargs) + self.ready = True + self.cgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + self.ready = False + + class FlupFCGIServer(object): """Adapter for a flup.server.fcgi.WSGIServer.""" def __init__(self, *args, **kwargs): + if kwargs.get('bindAddress', None) is None: + import socket + if not hasattr(socket, 'fromfd'): + raise ValueError( + 'Dynamic FCGI server not available on this platform. ' + 'You must use a static or external one by providing a ' + 'legal bindAddress.') self.args = args self.kwargs = kwargs self.ready = False @@ -199,8 +342,9 @@ def client_host(server_host): if server_host == '0.0.0.0': # 0.0.0.0 is INADDR_ANY, which should answer on localhost. return '127.0.0.1' - if server_host == '::': + if server_host in ('::', '::0', '::0.0.0.0'): # :: is IN6ADDR_ANY, which should answer on localhost. + # ::0 and ::0.0.0.0 are non-canonical but common ways to write IN6ADDR_ANY. return '::1' return server_host @@ -215,8 +359,16 @@ def check_port(host, port, timeout=1.0): # AF_INET or AF_INET6 socket # Get the correct address family for our host (allows IPv6 addresses) - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM) + except socket.gaierror: + if ':' in host: + info = [(socket.AF_INET6, socket.SOCK_STREAM, 0, "", (host, port, 0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", (host, port))] + + for res in info: af, socktype, proto, canonname, sa = res s = None try: @@ -233,34 +385,43 @@ def check_port(host, port, timeout=1.0): if s: s.close() -def wait_for_free_port(host, port): + +# Feel free to increase these defaults on slow systems: +free_port_timeout = 0.2 # Changed by Kovid +occupied_port_timeout = 1.0 + +def wait_for_free_port(host, port, timeout=None): """Wait for the specified port to become free (drop requests).""" if not host: raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = free_port_timeout - for trial in xrange(50): + for trial in range(50): try: # we are expecting a free port, so reduce the timeout - check_port(host, port, timeout=0.2) # Changed by Kovid + check_port(host, port, timeout=timeout) except IOError: # Give the old server thread time to free the port. - time.sleep(0.2) # Changed by Kovid + time.sleep(timeout) else: return raise IOError("Port %r not free on %r" % (port, host)) -def wait_for_occupied_port(host, port): +def wait_for_occupied_port(host, port, timeout=None): """Wait for the specified port to become active (receive requests).""" if not host: raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = occupied_port_timeout - for trial in xrange(50): + for trial in range(50): try: - check_port(host, port) + check_port(host, port, timeout=timeout) except IOError: return else: - time.sleep(.1) + time.sleep(timeout) raise IOError("Port %r not bound on %r" % (port, host)) diff --git a/src/cherrypy/process/win32.py b/src/cherrypy/process/win32.py index 0ca43d5e9b..83f99a5d46 100644 --- a/src/cherrypy/process/win32.py +++ b/src/cherrypy/process/win32.py @@ -1,7 +1,6 @@ """Windows service. Requires pywin32.""" import os -import thread import win32api import win32con import win32event @@ -84,7 +83,7 @@ class Win32Bus(wspbus.Bus): return self.events[state] except KeyError: event = win32event.CreateEvent(None, 0, 0, - u"WSPBus %s Event (pid=%r)" % + "WSPBus %s Event (pid=%r)" % (state.name, os.getpid())) self.events[state] = event return event @@ -97,7 +96,7 @@ class Win32Bus(wspbus.Bus): win32event.PulseEvent(event) state = property(_get_state, _set_state) - def wait(self, state, interval=0.1): + def wait(self, state, interval=0.1, channel=None): """Wait for the given state(s), KeyboardInterrupt or SystemExit. Since this class uses native win32event objects, the interval @@ -128,7 +127,7 @@ class _ControlCodes(dict): def key_for(self, obj): """For the given value, return its corresponding key.""" - for key, val in self.iteritems(): + for key, val in self.items(): if val is obj: return key raise ValueError("The given object could not be found: %r" % obj) diff --git a/src/cherrypy/process/wspbus.py b/src/cherrypy/process/wspbus.py index 5bbcb8c629..6ef768dcbb 100644 --- a/src/cherrypy/process/wspbus.py +++ b/src/cherrypy/process/wspbus.py @@ -20,24 +20,24 @@ autoreload component. Ideally, a Bus object will be flexible enough to be useful in a variety of invocation scenarios: - 1. The deployer starts a site from the command line via a framework- - neutral deployment script; applications from multiple frameworks - are mixed in a single site. Command-line arguments and configuration - files are used to define site-wide components such as the HTTP server, - WSGI component graph, autoreload behavior, signal handling, etc. + 1. The deployer starts a site from the command line via a + framework-neutral deployment script; applications from multiple frameworks + are mixed in a single site. Command-line arguments and configuration + files are used to define site-wide components such as the HTTP server, + WSGI component graph, autoreload behavior, signal handling, etc. 2. The deployer starts a site via some other process, such as Apache; - applications from multiple frameworks are mixed in a single site. - Autoreload and signal handling (from Python at least) are disabled. + applications from multiple frameworks are mixed in a single site. + Autoreload and signal handling (from Python at least) are disabled. 3. The deployer starts a site via a framework-specific mechanism; - for example, when running tests, exploring tutorials, or deploying - single applications from a single framework. The framework controls - which site-wide components are enabled as it sees fit. + for example, when running tests, exploring tutorials, or deploying + single applications from a single framework. The framework controls + which site-wide components are enabled as it sees fit. The Bus object in this package uses topic-based publish-subscribe messaging to accomplish all this. A few topic channels are built in -('start', 'stop', 'exit', and 'graceful'). Frameworks and site containers -are free to define their own. If a message is sent to a channel that has -not been defined or has no listeners, there is no effect. +('start', 'stop', 'exit', 'graceful', 'log', and 'main'). Frameworks and +site containers are free to define their own. If a message is sent to a +channel that has not been defined or has no listeners, there is no effect. In general, there should only ever be a single Bus object per process. Frameworks and site containers share a single Bus object by publishing @@ -46,7 +46,7 @@ messages and subscribing listeners. The Bus object works as a finite state machine which models the current state of the process. Bus methods move it from one state to another; those methods then publish to subscribed listeners on the channel for -the new state. +the new state.:: O | @@ -62,16 +62,49 @@ the new state. import atexit import os -try: - set -except NameError: - from sets import Set as set import sys import threading import time import traceback as _traceback import warnings +from cherrypy._cpcompat import set + +# Here I save the value of os.getcwd(), which, if I am imported early enough, +# will be the directory from which the startup script was run. This is needed +# by _do_execv(), to change back to the original directory before execv()ing a +# new process. This is a defense against the application having changed the +# current working directory (which could make sys.executable "not found" if +# sys.executable is a relative-path, and/or cause other problems). +_startup_cwd = os.getcwd() + +class ChannelFailures(Exception): + """Exception raised when errors occur in a listener during Bus.publish().""" + delimiter = '\n' + + def __init__(self, *args, **kwargs): + # Don't use 'super' here; Exceptions are old-style in Py2.4 + # See http://www.cherrypy.org/ticket/959 + Exception.__init__(self, *args, **kwargs) + self._exceptions = list() + + def handle_exception(self): + """Append the current exception to self.""" + self._exceptions.append(sys.exc_info()[1]) + + def get_instances(self): + """Return a list of seen exception instances.""" + return self._exceptions[:] + + def __str__(self): + exception_strings = map(repr, self.get_instances()) + return self.delimiter.join(exception_strings) + + __repr__ = __str__ + + def __bool__(self): + return bool(self._exceptions) + __nonzero__ = __bool__ # Use a flag to indicate the state of the bus. class _StateEnum(object): @@ -92,6 +125,17 @@ states.STOPPING = states.State() states.EXITING = states.State() +try: + import fcntl +except ImportError: + max_files = 0 +else: + try: + max_files = os.sysconf('SC_OPEN_MAX') + except AttributeError: + max_files = 1024 + + class Bus(object): """Process state-machine and messenger for HTTP site deployment. @@ -105,13 +149,14 @@ class Bus(object): states = states state = states.STOPPED execv = False + max_cloexec_files = max_files def __init__(self): self.execv = False self.state = states.STOPPED self.listeners = dict( [(channel, set()) for channel - in ('start', 'stop', 'exit', 'graceful', 'log')]) + in ('start', 'stop', 'exit', 'graceful', 'log', 'main')]) self._priorities = {} def subscribe(self, channel, callback, priority=None): @@ -136,24 +181,30 @@ class Bus(object): if channel not in self.listeners: return [] - exc = None + exc = ChannelFailures() output = [] items = [(self._priorities[(channel, listener)], listener) for listener in self.listeners[channel]] - items.sort() + try: + items.sort(key=lambda item: item[0]) + except TypeError: + # Python 2.3 had no 'key' arg, but that doesn't matter + # since it could sort dissimilar types just fine. + items.sort() for priority, listener in items: try: output.append(listener(*args, **kwargs)) except KeyboardInterrupt: raise - except SystemExit, e: + except SystemExit: + e = sys.exc_info()[1] # If we have previous errors ensure the exit code is non-zero if exc and e.code == 0: e.code = 1 raise except: - exc = sys.exc_info()[1] + exc.handle_exception() if channel == 'log': # Assume any further messages to 'log' will fail. pass @@ -161,7 +212,7 @@ class Bus(object): self.log("Error in %r listener %r" % (channel, listener), level=40, traceback=True) if exc: - raise + raise exc return output def _clean_exit(self): @@ -189,16 +240,18 @@ class Bus(object): except: self.log("Shutting down due to error in start listener:", level=40, traceback=True) - e_info = sys.exc_info() + e_info = sys.exc_info()[1] try: self.exit() except: # Any stop/exit errors will be logged inside publish(). pass - raise e_info[0], e_info[1], e_info[2] + # Re-raise the original error + raise e_info def exit(self): """Stop all services and prepare to exit the process.""" + exitstate = self.state try: self.stop() @@ -214,6 +267,13 @@ class Bus(object): # can't just let exceptions propagate out unhandled. # Assume it's been logged and just die. os._exit(70) # EX_SOFTWARE + + if exitstate == states.STARTING: + # exit() was called before start() finished, possibly due to + # Ctrl-C because a start listener got stuck. In this case, + # we could get stuck in a loop where Ctrl-C never exits the + # process, so we just call os.exit here. + os._exit(70) # EX_SOFTWARE def restart(self): """Restart the process (may close connections). @@ -239,7 +299,7 @@ class Bus(object): thread perform the actual execv call (required on some platforms). """ try: - self.wait(states.EXITING, interval=interval) + self.wait(states.EXITING, interval=interval, channel='main') except (KeyboardInterrupt, IOError): # The time.sleep call might raise # "IOError: [Errno 4] Interrupted function call" on KBInt. @@ -265,13 +325,14 @@ class Bus(object): else: d = t.isDaemon() if not d: + self.log("Waiting for thread %s." % t.getName()) t.join() if self.execv: self._do_execv() - def wait(self, state, interval=0.1): - """Wait for the given state(s).""" + def wait(self, state, interval=0.1, channel=None): + """Poll for the given state(s) at intervals; publish to channel.""" if isinstance(state, (tuple, list)): states = state else: @@ -280,6 +341,7 @@ class Bus(object): def _wait(): while self.state not in states: time.sleep(interval) + self.publish(channel) # From http://psyco.sourceforge.net/psycoguide/bugs.html: # "The compiled machine code does not include the regular polling @@ -302,11 +364,37 @@ class Bus(object): """ args = sys.argv[:] self.log('Re-spawning %s' % ' '.join(args)) - args.insert(0, sys.executable) - if sys.platform == 'win32': - args = ['"%s"' % arg for arg in args] - os.execv(sys.executable, args) + if sys.platform[:4] == 'java': + from _systemrestart import SystemRestart + raise SystemRestart + else: + args.insert(0, sys.executable) + if sys.platform == 'win32': + args = ['"%s"' % arg for arg in args] + + os.chdir(_startup_cwd) + if self.max_cloexec_files: + self._set_cloexec() + os.execv(sys.executable, args) + + def _set_cloexec(self): + """Set the CLOEXEC flag on all open files (except stdin/out/err). + + If self.max_cloexec_files is an integer (the default), then on + platforms which support it, it represents the max open files setting + for the operating system. This function will be called just before + the process is restarted via os.execv() to prevent open files + from persisting into the new process. + + Set self.max_cloexec_files to 0 to disable this behavior. + """ + for fd in range(3, self.max_cloexec_files): # skip stdin/out/err + try: + flags = fcntl.fcntl(fd, fcntl.F_GETFD) + except IOError: + continue + fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) def stop(self): """Stop all services.""" @@ -338,8 +426,7 @@ class Bus(object): def log(self, msg="", level=20, traceback=False): """Log the given message. Append the last traceback if requested.""" if traceback: - exc = sys.exc_info() - msg += "\n" + "".join(_traceback.format_exception(*exc)) + msg += "\n" + "".join(_traceback.format_exception(*sys.exc_info())) self.publish('log', msg, level) bus = Bus() diff --git a/src/cherrypy/scaffold/__init__.py b/src/cherrypy/scaffold/__init__.py index f50cc213d3..00964ac5f6 100644 --- a/src/cherrypy/scaffold/__init__.py +++ b/src/cherrypy/scaffold/__init__.py @@ -8,7 +8,7 @@ then tweak as desired. Even before any tweaking, this should serve a few demonstration pages. Change to this directory and run: - python cherrypy\cherryd -c cherrypy\scaffold\site.conf + ../cherryd -c site.conf """ diff --git a/src/cherrypy/scaffold/apache-fcgi.conf b/src/cherrypy/scaffold/apache-fcgi.conf new file mode 100644 index 0000000000..922398eaf8 --- /dev/null +++ b/src/cherrypy/scaffold/apache-fcgi.conf @@ -0,0 +1,22 @@ +# Apache2 server conf file for using CherryPy with mod_fcgid. + +# This doesn't have to be "C:/", but it has to be a directory somewhere, and +# MUST match the directory used in the FastCgiExternalServer directive, below. +DocumentRoot "C:/" + +ServerName 127.0.0.1 +Listen 80 +LoadModule fastcgi_module modules/mod_fastcgi.dll +LoadModule rewrite_module modules/mod_rewrite.so + +Options ExecCGI +SetHandler fastcgi-script +RewriteEngine On +# Send requests for any URI to our fastcgi handler. +RewriteRule ^(.*)$ /fastcgi.pyc [L] + +# The FastCgiExternalServer directive defines filename as an external FastCGI application. +# If filename does not begin with a slash (/) then it is assumed to be relative to the ServerRoot. +# The filename does not have to exist in the local filesystem. URIs that Apache resolves to this +# filename will be handled by this external FastCGI application. +FastCgiExternalServer "C:/fastcgi.pyc" -host 127.0.0.1:8088 \ No newline at end of file diff --git a/src/cherrypy/scaffold/site.conf b/src/cherrypy/scaffold/site.conf index 6fc8f4ec34..6ed3898373 100644 --- a/src/cherrypy/scaffold/site.conf +++ b/src/cherrypy/scaffold/site.conf @@ -5,4 +5,10 @@ server.socket_host: "0.0.0.0" server.socket_port: 8088 -tree.myapp: cherrypy.Application(scaffold.root, "/", "cherrypy/scaffold/example.conf") +# Uncomment the following lines to run on HTTPS at the same time +#server.2.socket_host: "0.0.0.0" +#server.2.socket_port: 8433 +#server.2.ssl_certificate: '../test/test.pem' +#server.2.ssl_private_key: '../test/test.pem' + +tree.myapp: cherrypy.Application(scaffold.root, "/", "example.conf") diff --git a/src/cherrypy/wsgiserver/__init__.py b/src/cherrypy/wsgiserver/__init__.py index c380e18b05..ee6190fee1 100644 --- a/src/cherrypy/wsgiserver/__init__.py +++ b/src/cherrypy/wsgiserver/__init__.py @@ -1,1794 +1,14 @@ -"""A high-speed, production ready, thread pooled, generic WSGI server. - -Simplest example on how to use this module directly -(without using CherryPy's application machinery): - - from cherrypy import wsgiserver - - def my_crazy_app(environ, start_response): - status = '200 OK' - response_headers = [('Content-type','text/plain')] - start_response(status, response_headers) - return ['Hello world!\n'] - - server = wsgiserver.CherryPyWSGIServer( - ('0.0.0.0', 8070), my_crazy_app, - server_name='www.cherrypy.example') - -The CherryPy WSGI server can serve as many WSGI applications -as you want in one instance by using a WSGIPathInfoDispatcher: - - d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) - server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) - -Want SSL support? Just set these attributes: - - server.ssl_certificate = - server.ssl_private_key = - - if __name__ == '__main__': - try: - server.start() - except KeyboardInterrupt: - server.stop() - -This won't call the CherryPy engine (application side) at all, only the -WSGI server, which is independant from the rest of CherryPy. Don't -let the name "CherryPyWSGIServer" throw you; the name merely reflects -its origin, not its coupling. - -For those of you wanting to understand internals of this module, here's the -basic call flow. The server's listening thread runs a very tight loop, -sticking incoming connections onto a Queue: - - server = CherryPyWSGIServer(...) - server.start() - while True: - tick() - # This blocks until a request comes in: - child = socket.accept() - conn = HTTPConnection(child, ...) - server.requests.put(conn) - -Worker threads are kept in a pool and poll the Queue, popping off and then -handling each connection in turn. Each connection can consist of an arbitrary -number of requests and their responses, so we run a nested loop: - - while True: - conn = server.requests.get() - conn.communicate() - -> while True: - req = HTTPRequest(...) - req.parse_request() - -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" - req.rfile.readline() - req.read_headers() - req.respond() - -> response = wsgi_app(...) - try: - for chunk in response: - if chunk: - req.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - if req.close_connection: - return -""" - - -import base64 -import os -import Queue -import re -quoted_slash = re.compile("(?i)%2F") -import rfc822 -import socket -try: - import cStringIO as StringIO -except ImportError: - import StringIO - -_fileobject_uses_str_type = isinstance(socket._fileobject(None)._rbuf, basestring) +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] import sys -import threading -import time -import traceback -from urllib import unquote -from urlparse import urlparse -import warnings - -try: - from OpenSSL import SSL - from OpenSSL import crypto -except ImportError: - SSL = None - -import errno - -def plat_specific_errors(*errnames): - """Return error numbers for all errors in errnames on this platform. - - The 'errno' module contains different global constants depending on - the specific platform (OS). This function will return the list of - numeric values for a given list of potential names. - """ - errno_names = dir(errno) - nums = [getattr(errno, k) for k in errnames if k in errno_names] - # de-dupe the list - return dict.fromkeys(nums).keys() - -socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") - -socket_errors_to_ignore = plat_specific_errors( - "EPIPE", - "EBADF", "WSAEBADF", - "ENOTSOCK", "WSAENOTSOCK", - "ETIMEDOUT", "WSAETIMEDOUT", - "ECONNREFUSED", "WSAECONNREFUSED", - "ECONNRESET", "WSAECONNRESET", - "ECONNABORTED", "WSAECONNABORTED", - "ENETRESET", "WSAENETRESET", - "EHOSTDOWN", "EHOSTUNREACH", - ) -socket_errors_to_ignore.append("timed out") - -socket_errors_nonblocking = plat_specific_errors( - 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') - -comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING', - 'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL', - 'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT', - 'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE', - 'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING', - 'WWW-AUTHENTICATE'] - - -class WSGIPathInfoDispatcher(object): - """A WSGI dispatcher for dispatch based on the PATH_INFO. - - apps: a dict or list of (path_prefix, app) pairs. - """ - - def __init__(self, apps): - try: - apps = apps.items() - except AttributeError: - pass - - # Sort the apps by len(path), descending - apps.sort() - apps.reverse() - - # The path_prefix strings must start, but not end, with a slash. - # Use "" instead of "/". - self.apps = [(p.rstrip("/"), a) for p, a in apps] - - def __call__(self, environ, start_response): - path = environ["PATH_INFO"] or "/" - for p, app in self.apps: - # The apps list should be sorted by length, descending. - if path.startswith(p + "/") or path == p: - environ = environ.copy() - environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p - environ["PATH_INFO"] = path[len(p):] - return app(environ, start_response) - - start_response('404 Not Found', [('Content-Type', 'text/plain'), - ('Content-Length', '0')]) - return [''] - - -class MaxSizeExceeded(Exception): - pass - -class SizeCheckWrapper(object): - """Wraps a file-like object, raising MaxSizeExceeded if too large.""" - - def __init__(self, rfile, maxlen): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - - def _check_length(self): - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded() - - def read(self, size=None): - data = self.rfile.read(size) - self.bytes_read += len(data) - self._check_length() - return data - - def readline(self, size=None): - if size is not None: - data = self.rfile.readline(size) - self.bytes_read += len(data) - self._check_length() - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - while True: - data = self.rfile.readline(256) - self.bytes_read += len(data) - self._check_length() - res.append(data) - # See http://www.cherrypy.org/ticket/421 - if len(data) < 256 or data[-1:] == "\n": - return ''.join(res) - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def next(self): - data = self.rfile.next() - self.bytes_read += len(data) - self._check_length() - return data - - -class HTTPRequest(object): - """An HTTP Request (and response). - - A single HTTP connection may consist of multiple request/response pairs. - - send: the 'send' method from the connection's socket object. - wsgi_app: the WSGI application to call. - environ: a partial WSGI environ (server and connection entries). - The caller MUST set the following entries: - * All wsgi.* entries, including .input - * SERVER_NAME and SERVER_PORT - * Any SSL_* entries - * Any custom entries like REMOTE_ADDR and REMOTE_PORT - * SERVER_SOFTWARE: the value to write in the "Server" response header. - * ACTUAL_SERVER_PROTOCOL: the value to write in the Status-Line of - the response. From RFC 2145: "An HTTP server SHOULD send a - response version equal to the highest version for which the - server is at least conditionally compliant, and whose major - version is less than or equal to the one received in the - request. An HTTP server MUST NOT send a version for which - it is not at least conditionally compliant." - - outheaders: a list of header tuples to write in the response. - ready: when True, the request has been parsed and is ready to begin - generating the response. When False, signals the calling Connection - that the response should not be generated and the connection should - close. - close_connection: signals the calling Connection that the request - should close. This does not imply an error! The client and/or - server may each request that the connection be closed. - chunked_write: if True, output will be encoded with the "chunked" - transfer-coding. This value is set automatically inside - send_headers. - """ - - max_request_header_size = 0 - max_request_body_size = 0 - - def __init__(self, wfile, environ, wsgi_app): - self.rfile = environ['wsgi.input'] - self.wfile = wfile - self.environ = environ.copy() - self.wsgi_app = wsgi_app - - self.ready = False - self.started_response = False - self.status = "" - self.outheaders = [] - self.sent_headers = False - self.close_connection = False - self.chunked_write = False - - def parse_request(self): - """Parse the next HTTP request start-line and message-headers.""" - self.rfile.maxlen = self.max_request_header_size - self.rfile.bytes_read = 0 - - try: - self._parse_request() - except MaxSizeExceeded: - self.simple_response("413 Request Entity Too Large") - return - - def _parse_request(self): - # HTTP/1.1 connections are persistent by default. If a client - # requests a page, then idles (leaves the connection open), - # then rfile.readline() will raise socket.error("timed out"). - # Note that it does this based on the value given to settimeout(), - # and doesn't need the client to request or acknowledge the close - # (although your TCP stack might suffer for it: cf Apache's history - # with FIN_WAIT_2). - request_line = self.rfile.readline() - if not request_line: - # Force self.ready = False so the connection will close. - self.ready = False - return - - if request_line == "\r\n": - # RFC 2616 sec 4.1: "...if the server is reading the protocol - # stream at the beginning of a message and receives a CRLF - # first, it should ignore the CRLF." - # But only ignore one leading line! else we enable a DoS. - request_line = self.rfile.readline() - if not request_line: - self.ready = False - return - - environ = self.environ - - try: - method, path, req_protocol = request_line.strip().split(" ", 2) - except ValueError: - self.simple_response(400, "Malformed Request-Line") - return - - environ["REQUEST_METHOD"] = method - - # path may be an abs_path (including "http://host.domain.tld"); - scheme, location, path, params, qs, frag = urlparse(path) - - if frag: - self.simple_response("400 Bad Request", - "Illegal #fragment in Request-URI.") - return - - if scheme: - environ["wsgi.url_scheme"] = scheme - if params: - path = path + ";" + params - - environ["SCRIPT_NAME"] = "" - - # Unquote the path+params (e.g. "/this%20path" -> "this path"). - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 - # - # But note that "...a URI must be separated into its components - # before the escaped characters within those components can be - # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 - atoms = [unquote(x) for x in quoted_slash.split(path)] - path = "%2F".join(atoms) - environ["PATH_INFO"] = path - - # Note that, like wsgiref and most other WSGI servers, - # we unquote the path but not the query string. - environ["QUERY_STRING"] = qs - - # Compare request and server HTTP protocol versions, in case our - # server does not support the requested protocol. Limit our output - # to min(req, server). We want the following output: - # request server actual written supported response - # protocol protocol response protocol feature set - # a 1.0 1.0 1.0 1.0 - # b 1.0 1.1 1.1 1.0 - # c 1.1 1.0 1.0 1.0 - # d 1.1 1.1 1.1 1.1 - # Notice that, in (b), the response will be "HTTP/1.1" even though - # the client only understands 1.0. RFC 2616 10.5.6 says we should - # only return 505 if the _major_ version is different. - rp = int(req_protocol[5]), int(req_protocol[7]) - server_protocol = environ["ACTUAL_SERVER_PROTOCOL"] - sp = int(server_protocol[5]), int(server_protocol[7]) - if sp[0] != rp[0]: - self.simple_response("505 HTTP Version Not Supported") - return - # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. - environ["SERVER_PROTOCOL"] = req_protocol - self.response_protocol = "HTTP/%s.%s" % min(rp, sp) - - # If the Request-URI was an absoluteURI, use its location atom. - if location: - environ["SERVER_NAME"] = location - - # then all the http headers - try: - self.read_headers() - except ValueError, ex: - self.simple_response("400 Bad Request", repr(ex.args)) - return - - mrbs = self.max_request_body_size - if mrbs and int(environ.get("CONTENT_LENGTH", 0)) > mrbs: - self.simple_response("413 Request Entity Too Large") - return - - # Persistent connection support - if self.response_protocol == "HTTP/1.1": - # Both server and client are HTTP/1.1 - if environ.get("HTTP_CONNECTION", "") == "close": - self.close_connection = True - else: - # Either the server or client (or both) are HTTP/1.0 - if environ.get("HTTP_CONNECTION", "") != "Keep-Alive": - self.close_connection = True - - # Transfer-Encoding support - te = None - if self.response_protocol == "HTTP/1.1": - te = environ.get("HTTP_TRANSFER_ENCODING") - if te: - te = [x.strip().lower() for x in te.split(",") if x.strip()] - - self.chunked_read = False - - if te: - for enc in te: - if enc == "chunked": - self.chunked_read = True - else: - # Note that, even if we see "chunked", we must reject - # if there is an extension we don't recognize. - self.simple_response("501 Unimplemented") - self.close_connection = True - return - - # From PEP 333: - # "Servers and gateways that implement HTTP 1.1 must provide - # transparent support for HTTP 1.1's "expect/continue" mechanism. - # This may be done in any of several ways: - # 1. Respond to requests containing an Expect: 100-continue request - # with an immediate "100 Continue" response, and proceed normally. - # 2. Proceed with the request normally, but provide the application - # with a wsgi.input stream that will send the "100 Continue" - # response if/when the application first attempts to read from - # the input stream. The read request must then remain blocked - # until the client responds. - # 3. Wait until the client decides that the server does not support - # expect/continue, and sends the request body on its own. - # (This is suboptimal, and is not recommended.) - # - # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, - # but it seems like it would be a big slowdown for such a rare case. - if environ.get("HTTP_EXPECT", "") == "100-continue": - self.simple_response(100) - - self.ready = True - - def read_headers(self): - """Read header lines from the incoming stream.""" - environ = self.environ - - while True: - line = self.rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - if line == '\r\n': - # Normal end of headers - break - - if line[0] in ' \t': - # It's a continuation line. - v = line.strip() - else: - k, v = line.split(":", 1) - k, v = k.strip().upper(), v.strip() - envname = "HTTP_" + k.replace("-", "_") - - if k in comma_separated_headers: - existing = environ.get(envname) - if existing: - v = ", ".join((existing, v)) - environ[envname] = v - - ct = environ.pop("HTTP_CONTENT_TYPE", None) - if ct is not None: - environ["CONTENT_TYPE"] = ct - cl = environ.pop("HTTP_CONTENT_LENGTH", None) - if cl is not None: - environ["CONTENT_LENGTH"] = cl - - def decode_chunked(self): - """Decode the 'chunked' transfer coding.""" - cl = 0 - data = StringIO.StringIO() - while True: - line = self.rfile.readline().strip().split(";", 1) - chunk_size = int(line.pop(0), 16) - if chunk_size <= 0: - break -## if line: chunk_extension = line[0] - cl += chunk_size - data.write(self.rfile.read(chunk_size)) - crlf = self.rfile.read(2) - if crlf != "\r\n": - self.simple_response("400 Bad Request", - "Bad chunked transfer coding " - "(expected '\\r\\n', got %r)" % crlf) - return - - # Grab any trailer headers - self.read_headers() - - data.seek(0) - self.environ["wsgi.input"] = data - self.environ["CONTENT_LENGTH"] = str(cl) or "" - return True - - def respond(self): - """Call the appropriate WSGI app and write its iterable output.""" - # Set rfile.maxlen to ensure we don't read past Content-Length. - # This will also be used to read the entire request body if errors - # are raised before the app can read the body. - if self.chunked_read: - # If chunked, Content-Length will be 0. - self.rfile.maxlen = self.max_request_body_size - else: - cl = int(self.environ.get("CONTENT_LENGTH", 0)) - if self.max_request_body_size: - self.rfile.maxlen = min(cl, self.max_request_body_size) - else: - self.rfile.maxlen = cl - self.rfile.bytes_read = 0 - - try: - self._respond() - except MaxSizeExceeded: - if not self.sent_headers: - self.simple_response("413 Request Entity Too Large") - return - - def _respond(self): - if self.chunked_read: - if not self.decode_chunked(): - self.close_connection = True - return - - response = self.wsgi_app(self.environ, self.start_response) - try: - for chunk in response: - # "The start_response callable must not actually transmit - # the response headers. Instead, it must store them for the - # server or gateway to transmit only after the first - # iteration of the application return value that yields - # a NON-EMPTY string, or upon the application's first - # invocation of the write() callable." (PEP 333) - if chunk: - self.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - - if (self.ready and not self.sent_headers): - self.sent_headers = True - self.send_headers() - if self.chunked_write: - self.wfile.sendall("0\r\n\r\n") - - def simple_response(self, status, msg=""): - """Write a simple response back to the client.""" - status = str(status) - buf = ["%s %s\r\n" % (self.environ['ACTUAL_SERVER_PROTOCOL'], status), - "Content-Length: %s\r\n" % len(msg), - "Content-Type: text/plain\r\n"] - - if status[:3] == "413" and self.response_protocol == 'HTTP/1.1': - # Request Entity Too Large - self.close_connection = True - buf.append("Connection: close\r\n") - - buf.append("\r\n") - if msg: - buf.append(msg) - - try: - self.wfile.sendall("".join(buf)) - except socket.error, x: - if x.args[0] not in socket_errors_to_ignore: - raise - - def start_response(self, status, headers, exc_info = None): - """WSGI callable to begin the HTTP response.""" - # "The application may call start_response more than once, - # if and only if the exc_info argument is provided." - if self.started_response and not exc_info: - raise AssertionError("WSGI start_response called a second " - "time with no exc_info.") - - # "if exc_info is provided, and the HTTP headers have already been - # sent, start_response must raise an error, and should raise the - # exc_info tuple." - if self.sent_headers: - try: - raise exc_info[0], exc_info[1], exc_info[2] - finally: - exc_info = None - - self.started_response = True - self.status = status - self.outheaders.extend(headers) - return self.write - - def write(self, chunk): - """WSGI callable to write unbuffered data to the client. - - This method is also used internally by start_response (to write - data from the iterable returned by the WSGI application). - """ - if not self.started_response: - raise AssertionError("WSGI write called before start_response.") - - if not self.sent_headers: - self.sent_headers = True - self.send_headers() - - if self.chunked_write and chunk: - buf = [hex(len(chunk))[2:], "\r\n", chunk, "\r\n"] - self.wfile.sendall("".join(buf)) - else: - self.wfile.sendall(chunk) - - def send_headers(self): - """Assert, process, and send the HTTP response message-headers.""" - hkeys = [key.lower() for key, value in self.outheaders] - status = int(self.status[:3]) - - if status == 413: - # Request Entity Too Large. Close conn to avoid garbage. - self.close_connection = True - elif "content-length" not in hkeys: - # "All 1xx (informational), 204 (no content), - # and 304 (not modified) responses MUST NOT - # include a message-body." So no point chunking. - if status < 200 or status in (204, 205, 304): - pass - else: - if (self.response_protocol == 'HTTP/1.1' - and self.environ["REQUEST_METHOD"] != 'HEAD'): - # Use the chunked transfer-coding - self.chunked_write = True - self.outheaders.append(("Transfer-Encoding", "chunked")) - else: - # Closing the conn is the only way to determine len. - self.close_connection = True - - if "connection" not in hkeys: - if self.response_protocol == 'HTTP/1.1': - # Both server and client are HTTP/1.1 or better - if self.close_connection: - self.outheaders.append(("Connection", "close")) - else: - # Server and/or client are HTTP/1.0 - if not self.close_connection: - self.outheaders.append(("Connection", "Keep-Alive")) - - if (not self.close_connection) and (not self.chunked_read): - # Read any remaining request body data on the socket. - # "If an origin server receives a request that does not include an - # Expect request-header field with the "100-continue" expectation, - # the request includes a request body, and the server responds - # with a final status code before reading the entire request body - # from the transport connection, then the server SHOULD NOT close - # the transport connection until it has read the entire request, - # or until the client closes the connection. Otherwise, the client - # might not reliably receive the response message. However, this - # requirement is not be construed as preventing a server from - # defending itself against denial-of-service attacks, or from - # badly broken client implementations." - size = self.rfile.maxlen - self.rfile.bytes_read - if size > 0: - self.rfile.read(size) - - if "date" not in hkeys: - self.outheaders.append(("Date", rfc822.formatdate())) - - if "server" not in hkeys: - self.outheaders.append(("Server", self.environ['SERVER_SOFTWARE'])) - - buf = [self.environ['ACTUAL_SERVER_PROTOCOL'], " ", self.status, "\r\n"] - try: - buf += [k + ": " + v + "\r\n" for k, v in self.outheaders] - except TypeError: - if not isinstance(k, str): - raise TypeError("WSGI response header key %r is not a string.") - if not isinstance(v, str): - raise TypeError("WSGI response header value %r is not a string.") - else: - raise - buf.append("\r\n") - self.wfile.sendall("".join(buf)) - - -class NoSSLError(Exception): - """Exception raised when a client speaks HTTP to an HTTPS socket.""" - pass - - -class FatalSSLAlert(Exception): - """Exception raised when the SSL implementation signals a fatal alert.""" - pass - - -if not _fileobject_uses_str_type: - class CP_fileobject(socket._fileobject): - """Faux file object attached to a socket object.""" - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - return self._sock.send(data) - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - return self._sock.recv(size) - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(rbufsize) - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - data = self.recv(left) - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self.recv - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(self._rbufsize) - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. - while True: - data = self.recv(self._rbufsize) - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - +if sys.version_info < (3, 0): + from wsgiserver2 import * else: - class CP_fileobject(socket._fileobject): - """Faux file object attached to a socket object.""" - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - return self._sock.send(data) - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - return self._sock.recv(size) - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - def read(self, size=-1): - if size < 0: - # Read until EOF - buffers = [self._rbuf] - self._rbuf = "" - if self._rbufsize <= 1: - recv_size = self.default_bufsize - else: - recv_size = self._rbufsize - - while True: - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - return "".join(buffers) - else: - # Read until size bytes or EOF seen, whichever comes first - data = self._rbuf - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - left = size - buf_len - recv_size = max(self._rbufsize, left) - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - def readline(self, size=-1): - data = self._rbuf - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - assert data == "" - buffers = [] - while data != "\n": - data = self.recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - return "".join(buffers) - else: - # Read until size bytes or \n or EOF seen, whichever comes first - nl = data.find('\n', 0, size) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - left = size - buf_len - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - -class SSL_fileobject(CP_fileobject): - """SSL file object attached to a socket object.""" - - ssl_timeout = 3 - ssl_retry = .01 - - def _safe_call(self, is_reader, call, *args, **kwargs): - """Wrap the given call with SSL error-trapping. - - is_reader: if False EOF errors will be raised. If True, EOF errors - will return "" (to emulate normal sockets). - """ - start = time.time() - while True: - try: - return call(*args, **kwargs) - except SSL.WantReadError: - # Sleep and try again. This is dangerous, because it means - # the rest of the stack has no way of differentiating - # between a "new handshake" error and "client dropped". - # Note this isn't an endless loop: there's a timeout below. - time.sleep(self.ssl_retry) - except SSL.WantWriteError: - time.sleep(self.ssl_retry) - except SSL.SysCallError, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - errnum = e.args[0] - if is_reader and errnum in socket_errors_to_ignore: - return "" - raise socket.error(errnum) - except SSL.Error, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - thirdarg = None - try: - thirdarg = e.args[0][0][2] - except IndexError: - pass - - if thirdarg == 'http request': - # The client is talking HTTP to an HTTPS server. - raise NoSSLError() - raise FatalSSLAlert(*e.args) - except: - raise - - if time.time() - start > self.ssl_timeout: - raise socket.timeout("timed out") - - def recv(self, *args, **kwargs): - buf = [] - r = super(SSL_fileobject, self).recv - while True: - data = self._safe_call(True, r, *args, **kwargs) - buf.append(data) - p = self._sock.pending() - if not p: - return "".join(buf) - - def sendall(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).sendall, *args, **kwargs) - - def send(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).send, *args, **kwargs) - - -class HTTPConnection(object): - """An HTTP connection (active socket). - - socket: the raw socket object (usually TCP) for this connection. - wsgi_app: the WSGI application for this server/connection. - environ: a WSGI environ template. This will be copied for each request. - - rfile: a fileobject for reading from the socket. - send: a function for writing (+ flush) to the socket. - """ - - rbufsize = -1 - RequestHandlerClass = HTTPRequest - environ = {"wsgi.version": (1, 0), - "wsgi.url_scheme": "http", - "wsgi.multithread": True, - "wsgi.multiprocess": False, - "wsgi.run_once": False, - "wsgi.errors": sys.stderr, - } - - def __init__(self, sock, wsgi_app, environ): - self.socket = sock - self.wsgi_app = wsgi_app - - # Copy the class environ into self. - self.environ = self.environ.copy() - self.environ.update(environ) - - if SSL and isinstance(sock, SSL.ConnectionType): - timeout = sock.gettimeout() - self.rfile = SSL_fileobject(sock, "rb", self.rbufsize) - self.rfile.ssl_timeout = timeout - self.wfile = SSL_fileobject(sock, "wb", -1) - self.wfile.ssl_timeout = timeout - else: - self.rfile = CP_fileobject(sock, "rb", self.rbufsize) - self.wfile = CP_fileobject(sock, "wb", -1) - - # Wrap wsgi.input but not HTTPConnection.rfile itself. - # We're also not setting maxlen yet; we'll do that separately - # for headers and body for each iteration of self.communicate - # (if maxlen is 0 the wrapper doesn't check length). - self.environ["wsgi.input"] = SizeCheckWrapper(self.rfile, 0) - - def communicate(self): - """Read each request and respond appropriately.""" - try: - while True: - # (re)set req to None so that if something goes wrong in - # the RequestHandlerClass constructor, the error doesn't - # get written to the previous request. - req = None - req = self.RequestHandlerClass(self.wfile, self.environ, - self.wsgi_app) - - # This order of operations should guarantee correct pipelining. - req.parse_request() - if not req.ready: - return - - req.respond() - if req.close_connection: - return - - except socket.error, e: - errnum = e.args[0] - if errnum == 'timed out': - if req and not req.sent_headers: - req.simple_response("408 Request Timeout") - elif errnum not in socket_errors_to_ignore: - if req and not req.sent_headers: - req.simple_response("500 Internal Server Error", - format_exc()) - return - except (KeyboardInterrupt, SystemExit): - raise - except FatalSSLAlert, e: - # Close the connection. - return - except NoSSLError: - if req and not req.sent_headers: - # Unwrap our wfile - req.wfile = CP_fileobject(self.socket._sock, "wb", -1) - req.simple_response("400 Bad Request", - "The client sent a plain HTTP request, but " - "this server only speaks HTTPS on this port.") - self.linger = True - except Exception, e: - if req and not req.sent_headers: - req.simple_response("500 Internal Server Error", format_exc()) - - linger = False - - def close(self): - """Close the socket underlying this connection.""" - self.rfile.close() - - if not self.linger: - # Python's socket module does NOT call close on the kernel socket - # when you call socket.close(). We do so manually here because we - # want this server to send a FIN TCP segment immediately. Note this - # must be called *before* calling socket.close(), because the latter - # drops its reference to the kernel socket. - self.socket._sock.close() - self.socket.close() - else: - # On the other hand, sometimes we want to hang around for a bit - # to make sure the client has a chance to read our entire - # response. Skipping the close() calls here delays the FIN - # packet until the socket object is garbage-collected later. - # Someday, perhaps, we'll do the full lingering_close that - # Apache does, but not today. - pass - - -def format_exc(limit=None): - """Like print_exc() but return a string. Backport for Python 2.3.""" - try: - etype, value, tb = sys.exc_info() - return ''.join(traceback.format_exception(etype, value, tb, limit)) - finally: - etype = value = tb = None - - -_SHUTDOWNREQUEST = None - -class WorkerThread(threading.Thread): - """Thread which continuously polls a Queue for Connection objects. - - server: the HTTP Server which spawned this thread, and which owns the - Queue and is placing active connections into it. - ready: a simple flag for the calling server to know when this thread - has begun polling the Queue. - - Due to the timing issues of polling a Queue, a WorkerThread does not - check its own 'ready' flag after it has started. To stop the thread, - it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue - (one for each running WorkerThread). - """ - - conn = None - - def __init__(self, server): - self.ready = False - self.server = server - threading.Thread.__init__(self) - - def run(self): - try: - self.ready = True - while True: - conn = self.server.requests.get() - if conn is _SHUTDOWNREQUEST: - return - - self.conn = conn - try: - conn.communicate() - finally: - conn.close() - self.conn = None - except (KeyboardInterrupt, SystemExit), exc: - self.server.interrupt = exc - - -class ThreadPool(object): - """A Request Queue for the CherryPyWSGIServer which pools threads. - - ThreadPool objects must provide min, get(), put(obj), start() - and stop(timeout) attributes. - """ - - def __init__(self, server, min=10, max=-1): - self.server = server - self.min = min - self.max = max - self._threads = [] - self._queue = Queue.Queue() - self.get = self._queue.get - - def start(self): - """Start the pool of threads.""" - for i in xrange(self.min): - self._threads.append(WorkerThread(self.server)) - for worker in self._threads: - worker.setName("CP WSGIServer " + worker.getName()) - worker.start() - for worker in self._threads: - while not worker.ready: - time.sleep(.1) - - def _get_idle(self): - """Number of worker threads which are idle. Read-only.""" - return len([t for t in self._threads if t.conn is None]) - idle = property(_get_idle, doc=_get_idle.__doc__) - - def put(self, obj): - self._queue.put(obj) - if obj is _SHUTDOWNREQUEST: - return - - def grow(self, amount): - """Spawn new worker threads (not above self.max).""" - for i in xrange(amount): - if self.max > 0 and len(self._threads) >= self.max: - break - worker = WorkerThread(self.server) - worker.setName("CP WSGIServer " + worker.getName()) - self._threads.append(worker) - worker.start() - - def shrink(self, amount): - """Kill off worker threads (not below self.min).""" - # Grow/shrink the pool if necessary. - # Remove any dead threads from our list - for t in self._threads: - if not t.isAlive(): - self._threads.remove(t) - amount -= 1 - - if amount > 0: - for i in xrange(min(amount, len(self._threads) - self.min)): - # Put a number of shutdown requests on the queue equal - # to 'amount'. Once each of those is processed by a worker, - # that worker will terminate and be culled from our list - # in self.put. - self._queue.put(_SHUTDOWNREQUEST) - - def stop(self, timeout=5): - # Must shut down threads here so the code that calls - # this method can know when all threads are stopped. - for worker in self._threads: - self._queue.put(_SHUTDOWNREQUEST) - - # Don't join currentThread (when stop is called inside a request). - current = threading.currentThread() - while self._threads: - worker = self._threads.pop() - if worker is not current and worker.isAlive(): - try: - if timeout is None or timeout < 0: - worker.join() - else: - worker.join(timeout) - if worker.isAlive(): - # We exhausted the timeout. - # Forcibly shut down the socket. - c = worker.conn - if c and not c.rfile.closed: - if SSL and isinstance(c.socket, SSL.ConnectionType): - # pyOpenSSL.socket.shutdown takes no args - c.socket.shutdown() - else: - c.socket.shutdown(socket.SHUT_RD) - worker.join() - except (AssertionError, - # Ignore repeated Ctrl-C. - # See http://www.cherrypy.org/ticket/691. - KeyboardInterrupt), exc1: - pass - - - -class SSLConnection: - """A thread-safe wrapper for an SSL.Connection. - - *args: the arguments to create the wrapped SSL.Connection(*args). - """ - - def __init__(self, *args): - self._ssl_conn = SSL.Connection(*args) - self._lock = threading.RLock() - - for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', - 'renegotiate', 'bind', 'listen', 'connect', 'accept', - 'setblocking', 'fileno', 'shutdown', 'close', 'get_cipher_list', - 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', - 'makefile', 'get_app_data', 'set_app_data', 'state_string', - 'sock_shutdown', 'get_peer_certificate', 'want_read', - 'want_write', 'set_connect_state', 'set_accept_state', - 'connect_ex', 'sendall', 'settimeout'): - exec """def %s(self, *args): - self._lock.acquire() - try: - return self._ssl_conn.%s(*args) - finally: - self._lock.release() -""" % (f, f) - - -try: - import fcntl -except ImportError: - try: - from ctypes import windll, WinError - except ImportError: - def prevent_socket_inheritance(sock): - """Dummy function, since neither fcntl nor ctypes are available.""" - pass - else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (Windows).""" - if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): - raise WinError() -else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (POSIX).""" - fd = sock.fileno() - old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) - - -class CherryPyWSGIServer(object): - """An HTTP server for WSGI. - - bind_addr: The interface on which to listen for connections. - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string. - wsgi_app: the WSGI 'application callable'; multiple WSGI applications - may be passed as (path_prefix, app) pairs. - numthreads: the number of worker threads to create (default 10). - server_name: the string to set for WSGI's SERVER_NAME environ entry. - Defaults to socket.gethostname(). - max: the maximum number of queued requests (defaults to -1 = no limit). - request_queue_size: the 'backlog' argument to socket.listen(); - specifies the maximum number of queued connections (default 5). - timeout: the timeout in seconds for accepted connections (default 10). - - nodelay: if True (the default since 3.1), sets the TCP_NODELAY socket - option. - - protocol: the version string to write in the Status-Line of all - HTTP responses. For example, "HTTP/1.1" (the default). This - also limits the supported features used in the response. - - - SSL/HTTPS - --------- - The OpenSSL module must be importable for SSL functionality. - You can obtain it from http://pyopenssl.sourceforge.net/ - - ssl_certificate: the filename of the server SSL certificate. - ssl_privatekey: the filename of the server's private key file. - - If either of these is None (both are None by default), this server - will not use SSL. If both are given and are valid, they will be read - on server start and used in the SSL context for the listening socket. - """ - - protocol = "HTTP/1.1" - _bind_addr = "127.0.0.1" - version = "CherryPy/3.1.2" - ready = False - _interrupt = None - - nodelay = True - - ConnectionClass = HTTPConnection - environ = {} - - # Paths to certificate and private key files - ssl_certificate = None - ssl_private_key = None - - def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, - max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): - self.requests = ThreadPool(self, min=numthreads or 1, max=max) - - if callable(wsgi_app): - # We've been handed a single wsgi_app, in CP-2.1 style. - # Assume it's mounted at "". - self.wsgi_app = wsgi_app - else: - # We've been handed a list of (path_prefix, wsgi_app) tuples, - # so that the server can call different wsgi_apps, and also - # correctly set SCRIPT_NAME. - warnings.warn("The ability to pass multiple apps is deprecated " - "and will be removed in 3.2. You should explicitly " - "include a WSGIPathInfoDispatcher instead.", - DeprecationWarning) - self.wsgi_app = WSGIPathInfoDispatcher(wsgi_app) - - self.bind_addr = bind_addr - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.request_queue_size = request_queue_size - - self.timeout = timeout - self.shutdown_timeout = shutdown_timeout - - def _get_numthreads(self): - return self.requests.min - def _set_numthreads(self, value): - self.requests.min = value - numthreads = property(_get_numthreads, _set_numthreads) - - def __str__(self): - return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, - self.bind_addr) - - def _get_bind_addr(self): - return self._bind_addr - def _set_bind_addr(self, value): - if isinstance(value, tuple) and value[0] in ('', None): - # Despite the socket module docs, using '' does not - # allow AI_PASSIVE to work. Passing None instead - # returns '0.0.0.0' like we want. In other words: - # host AI_PASSIVE result - # '' Y 192.168.x.y - # '' N 192.168.x.y - # None Y 0.0.0.0 - # None N 127.0.0.1 - # But since you can get the same effect with an explicit - # '0.0.0.0', we deny both the empty string and None as values. - raise ValueError("Host values of '' or None are not allowed. " - "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " - "to listen on all active interfaces.") - self._bind_addr = value - bind_addr = property(_get_bind_addr, _set_bind_addr, - doc="""The interface on which to listen for connections. - - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string.""") - - def start(self): - """Run the server forever.""" - # We don't have to trap KeyboardInterrupt or SystemExit here, - # because cherrpy.server already does so, calling self.stop() for us. - # If you're using this server with another framework, you should - # trap those exceptions in whatever code block calls start(). - self._interrupt = None - - # Select the appropriate socket - if isinstance(self.bind_addr, basestring): - # AF_UNIX socket - - # So we can reuse the socket... - try: os.unlink(self.bind_addr) - except: pass - - # So everyone can access the socket... - try: os.chmod(self.bind_addr, 0777) - except: pass - - info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] - else: - # AF_INET or AF_INET6 socket - # Get the correct address family for our host (allows IPv6 addresses) - host, port = self.bind_addr - try: - info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM, 0, socket.AI_PASSIVE) - except socket.gaierror: - # Probably a DNS issue. Assume IPv4. - info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", self.bind_addr)] - - self.socket = None - msg = "No socket could be created" - for res in info: - af, socktype, proto, canonname, sa = res - try: - self.bind(af, socktype, proto) - except socket.error, msg: - if self.socket: - self.socket.close() - self.socket = None - continue - break - if not self.socket: - raise socket.error, msg - - # Timeout so KeyboardInterrupt can be caught on Win32 - self.socket.settimeout(1) - self.socket.listen(self.request_queue_size) - - # Create worker threads - self.requests.start() - - self.ready = True - while self.ready: - self.tick() - if self.interrupt: - while self.interrupt is True: - # Wait for self.stop() to complete. See _set_interrupt. - time.sleep(0.1) - if self.interrupt: - raise self.interrupt - - def bind(self, family, type, proto=0): - """Create (or recreate) the actual socket object.""" - self.socket = socket.socket(family, type, proto) - prevent_socket_inheritance(self.socket) - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - if self.nodelay: - self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - if self.ssl_certificate and self.ssl_private_key: - if SSL is None: - raise ImportError("You must install pyOpenSSL to use HTTPS.") - - # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 - ctx = SSL.Context(SSL.SSLv23_METHOD) - ctx.use_privatekey_file(self.ssl_private_key) - ctx.use_certificate_file(self.ssl_certificate) - self.socket = SSLConnection(ctx, self.socket) - self.populate_ssl_environ() - - # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), - # activate dual-stack. See http://www.cherrypy.org/ticket/871. - if (not isinstance(self.bind_addr, basestring) - and self.bind_addr[0] == '::' and family == socket.AF_INET6): - try: - self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) - except (AttributeError, socket.error): - # Apparently, the socket option is not available in - # this machine's TCP stack - pass - - self.socket.bind(self.bind_addr) - - def tick(self): - """Accept a new connection and put it on the Queue.""" - try: - s, addr = self.socket.accept() - prevent_socket_inheritance(s) - if not self.ready: - return - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - environ = self.environ.copy() - # SERVER_SOFTWARE is common for IIS. It's also helpful for - # us to pass a default value for the "Server" response header. - if environ.get("SERVER_SOFTWARE") is None: - environ["SERVER_SOFTWARE"] = "%s WSGI Server" % self.version - # set a non-standard environ entry so the WSGI app can know what - # the *real* server protocol is (and what features to support). - # See http://www.faqs.org/rfcs/rfc2145.html. - environ["ACTUAL_SERVER_PROTOCOL"] = self.protocol - environ["SERVER_NAME"] = self.server_name - - if isinstance(self.bind_addr, basestring): - # AF_UNIX. This isn't really allowed by WSGI, which doesn't - # address unix domain sockets. But it's better than nothing. - environ["SERVER_PORT"] = "" - else: - environ["SERVER_PORT"] = str(self.bind_addr[1]) - # optional values - # Until we do DNS lookups, omit REMOTE_HOST - environ["REMOTE_ADDR"] = addr[0] - environ["REMOTE_PORT"] = str(addr[1]) - - conn = self.ConnectionClass(s, self.wsgi_app, environ) - self.requests.put(conn) - except socket.timeout: - # The only reason for the timeout in start() is so we can - # notice keyboard interrupts on Win32, which don't interrupt - # accept() by default - return - except socket.error, x: - if x.args[0] in socket_error_eintr: - # I *think* this is right. EINTR should occur when a signal - # is received during the accept() call; all docs say retry - # the call, and I *think* I'm reading it right that Python - # will then go ahead and poll for and handle the signal - # elsewhere. See http://www.cherrypy.org/ticket/707. - return - if x.args[0] in socket_errors_nonblocking: - # Just try again. See http://www.cherrypy.org/ticket/479. - return - if x.args[0] in socket_errors_to_ignore: - # Our socket was closed. - # See http://www.cherrypy.org/ticket/686. - return - raise - - def _get_interrupt(self): - return self._interrupt - def _set_interrupt(self, interrupt): - self._interrupt = True - self.stop() - self._interrupt = interrupt - interrupt = property(_get_interrupt, _set_interrupt, - doc="Set this to an Exception instance to " - "interrupt the server.") - - def stop(self): - """Gracefully shutdown a server that is serving forever.""" - self.ready = False - - sock = getattr(self, "socket", None) - if sock: - if not isinstance(self.bind_addr, basestring): - # Touch our own socket to make accept() return immediately. - try: - host, port = sock.getsockname()[:2] - except socket.error, x: - if x.args[0] not in socket_errors_to_ignore: - raise - else: - # Note that we're explicitly NOT using AI_PASSIVE, - # here, because we want an actual IP to touch. - # localhost won't work if we've bound to a public IP, - # but it will if we bound to '0.0.0.0' (INADDR_ANY). - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - s = None - try: - s = socket.socket(af, socktype, proto) - # See http://groups.google.com/group/cherrypy-users/ - # browse_frm/thread/bbfe5eb39c904fe0 - s.settimeout(1.0) - s.connect((host, port)) - s.close() - except socket.error: - if s: - s.close() - if hasattr(sock, "close"): - sock.close() - self.socket = None - - self.requests.stop(self.shutdown_timeout) - - def populate_ssl_environ(self): - """Create WSGI environ entries to be merged into each request.""" - cert = open(self.ssl_certificate, 'rb').read() - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) - ssl_environ = { - "wsgi.url_scheme": "https", - "HTTPS": "on", - # pyOpenSSL doesn't provide access to any of these AFAICT -## 'SSL_PROTOCOL': 'SSLv2', -## SSL_CIPHER string The cipher specification name -## SSL_VERSION_INTERFACE string The mod_ssl program version -## SSL_VERSION_LIBRARY string The OpenSSL program version - } - - # Server certificate attributes - ssl_environ.update({ - 'SSL_SERVER_M_VERSION': cert.get_version(), - 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), -## 'SSL_SERVER_V_START': Validity of server's certificate (start time), -## 'SSL_SERVER_V_END': Validity of server's certificate (end time), - }) - - for prefix, dn in [("I", cert.get_issuer()), - ("S", cert.get_subject())]: - # X509Name objects don't seem to have a way to get the - # complete DN string. Use str() and slice it instead, - # because str(dn) == "" - dnstr = str(dn)[18:-2] - - wsgikey = 'SSL_SERVER_%s_DN' % prefix - ssl_environ[wsgikey] = dnstr - - # The DN should be of the form: /k1=v1/k2=v2, but we must allow - # for any value to contain slashes itself (in a URL). - while dnstr: - pos = dnstr.rfind("=") - dnstr, value = dnstr[:pos], dnstr[pos + 1:] - pos = dnstr.rfind("/") - dnstr, key = dnstr[:pos], dnstr[pos + 1:] - if key and value: - wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) - ssl_environ[wsgikey] = value - - self.environ.update(ssl_environ) - + # Le sigh. Boo for backward-incompatible syntax. + exec('from .wsgiserver3 import *') diff --git a/src/cherrypy/wsgiserver/ssl_builtin.py b/src/cherrypy/wsgiserver/ssl_builtin.py new file mode 100644 index 0000000000..03bf05deed --- /dev/null +++ b/src/cherrypy/wsgiserver/ssl_builtin.py @@ -0,0 +1,91 @@ +"""A library for integrating Python's builtin ``ssl`` library with CherryPy. + +The ssl module must be importable for SSL functionality. + +To use this module, set ``CherryPyWSGIServer.ssl_adapter`` to an instance of +``BuiltinSSLAdapter``. +""" + +try: + import ssl +except ImportError: + ssl = None + +try: + from _pyio import DEFAULT_BUFFER_SIZE +except ImportError: + try: + from io import DEFAULT_BUFFER_SIZE + except ImportError: + DEFAULT_BUFFER_SIZE = -1 + +import sys + +from cherrypy import wsgiserver + + +class BuiltinSSLAdapter(wsgiserver.SSLAdapter): + """A wrapper for integrating Python's builtin ssl module with CherryPy.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if ssl is None: + raise ImportError("You must install the ssl module to use HTTPS.") + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def bind(self, sock): + """Wrap and return the given socket.""" + return sock + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + try: + s = ssl.wrap_socket(sock, do_handshake_on_connect=True, + server_side=True, certfile=self.certificate, + keyfile=self.private_key, ssl_version=ssl.PROTOCOL_SSLv23) + except ssl.SSLError: + e = sys.exc_info()[1] + if e.errno == ssl.SSL_ERROR_EOF: + # This is almost certainly due to the cherrypy engine + # 'pinging' the socket to assert it's connectable; + # the 'ping' isn't SSL. + return None, {} + elif e.errno == ssl.SSL_ERROR_SSL: + if e.args[1].endswith('http request'): + # The client is speaking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError + elif e.args[1].endswith('unknown protocol'): + # The client is speaking some non-HTTP protocol. + # Drop the conn. + return None, {} + raise + return s, self.get_environ(s) + + # TODO: fill this out more with mod ssl env + def get_environ(self, sock): + """Create WSGI environ entries to be merged into each request.""" + cipher = sock.cipher() + ssl_environ = { + "wsgi.url_scheme": "https", + "HTTPS": "on", + 'SSL_PROTOCOL': cipher[1], + 'SSL_CIPHER': cipher[0] +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + return ssl_environ + + if sys.version_info >= (3, 0): + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_makefile(sock, mode, bufsize) + else: + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_fileobject(sock, mode, bufsize) + diff --git a/src/cherrypy/wsgiserver/ssl_pyopenssl.py b/src/cherrypy/wsgiserver/ssl_pyopenssl.py new file mode 100644 index 0000000000..f3d9bf54b8 --- /dev/null +++ b/src/cherrypy/wsgiserver/ssl_pyopenssl.py @@ -0,0 +1,256 @@ +"""A library for integrating pyOpenSSL with CherryPy. + +The OpenSSL module must be importable for SSL functionality. +You can obtain it from http://pyopenssl.sourceforge.net/ + +To use this module, set CherryPyWSGIServer.ssl_adapter to an instance of +SSLAdapter. There are two ways to use SSL: + +Method One +---------- + + * ``ssl_adapter.context``: an instance of SSL.Context. + +If this is not None, it is assumed to be an SSL.Context instance, +and will be passed to SSL.Connection on bind(). The developer is +responsible for forming a valid Context object. This approach is +to be preferred for more flexibility, e.g. if the cert and key are +streams instead of files, or need decryption, or SSL.SSLv3_METHOD +is desired instead of the default SSL.SSLv23_METHOD, etc. Consult +the pyOpenSSL documentation for complete options. + +Method Two (shortcut) +--------------------- + + * ``ssl_adapter.certificate``: the filename of the server SSL certificate. + * ``ssl_adapter.private_key``: the filename of the server's private key file. + +Both are None by default. If ssl_adapter.context is None, but .private_key +and .certificate are both given and valid, they will be read, and the +context will be automatically created from them. +""" + +import socket +import threading +import time + +from cherrypy import wsgiserver + +try: + from OpenSSL import SSL + from OpenSSL import crypto +except ImportError: + SSL = None + + +class SSL_fileobject(wsgiserver.CP_fileobject): + """SSL file object attached to a socket object.""" + + ssl_timeout = 3 + ssl_retry = .01 + + def _safe_call(self, is_reader, call, *args, **kwargs): + """Wrap the given call with SSL error-trapping. + + is_reader: if False EOF errors will be raised. If True, EOF errors + will return "" (to emulate normal sockets). + """ + start = time.time() + while True: + try: + return call(*args, **kwargs) + except SSL.WantReadError: + # Sleep and try again. This is dangerous, because it means + # the rest of the stack has no way of differentiating + # between a "new handshake" error and "client dropped". + # Note this isn't an endless loop: there's a timeout below. + time.sleep(self.ssl_retry) + except SSL.WantWriteError: + time.sleep(self.ssl_retry) + except SSL.SysCallError, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + errnum = e.args[0] + if is_reader and errnum in wsgiserver.socket_errors_to_ignore: + return "" + raise socket.error(errnum) + except SSL.Error, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + thirdarg = None + try: + thirdarg = e.args[0][0][2] + except IndexError: + pass + + if thirdarg == 'http request': + # The client is talking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError() + + raise wsgiserver.FatalSSLAlert(*e.args) + except: + raise + + if time.time() - start > self.ssl_timeout: + raise socket.timeout("timed out") + + def recv(self, *args, **kwargs): + buf = [] + r = super(SSL_fileobject, self).recv + while True: + data = self._safe_call(True, r, *args, **kwargs) + buf.append(data) + p = self._sock.pending() + if not p: + return "".join(buf) + + def sendall(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).sendall, + *args, **kwargs) + + def send(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).send, + *args, **kwargs) + + +class SSLConnection: + """A thread-safe wrapper for an SSL.Connection. + + ``*args``: the arguments to create the wrapped ``SSL.Connection(*args)``. + """ + + def __init__(self, *args): + self._ssl_conn = SSL.Connection(*args) + self._lock = threading.RLock() + + for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', + 'renegotiate', 'bind', 'listen', 'connect', 'accept', + 'setblocking', 'fileno', 'close', 'get_cipher_list', + 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', + 'makefile', 'get_app_data', 'set_app_data', 'state_string', + 'sock_shutdown', 'get_peer_certificate', 'want_read', + 'want_write', 'set_connect_state', 'set_accept_state', + 'connect_ex', 'sendall', 'settimeout', 'gettimeout'): + exec("""def %s(self, *args): + self._lock.acquire() + try: + return self._ssl_conn.%s(*args) + finally: + self._lock.release() +""" % (f, f)) + + def shutdown(self, *args): + self._lock.acquire() + try: + # pyOpenSSL.socket.shutdown takes no args + return self._ssl_conn.shutdown() + finally: + self._lock.release() + + +class pyOpenSSLAdapter(wsgiserver.SSLAdapter): + """A wrapper for integrating pyOpenSSL with CherryPy.""" + + context = None + """An instance of SSL.Context.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + certificate_chain = None + """Optional. The filename of CA's intermediate certificate bundle. + + This is needed for cheaper "chained root" SSL certificates, and should be + left as None if not required.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if SSL is None: + raise ImportError("You must install pyOpenSSL to use HTTPS.") + + self.context = None + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + self._environ = None + + def bind(self, sock): + """Wrap and return the given socket.""" + if self.context is None: + self.context = self.get_context() + conn = SSLConnection(self.context, sock) + self._environ = self.get_environ() + return conn + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + return sock, self._environ.copy() + + def get_context(self): + """Return an SSL.Context from self attributes.""" + # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 + c = SSL.Context(SSL.SSLv23_METHOD) + c.use_privatekey_file(self.private_key) + if self.certificate_chain: + c.load_verify_locations(self.certificate_chain) + c.use_certificate_file(self.certificate) + return c + + def get_environ(self): + """Return WSGI environ entries to be merged into each request.""" + ssl_environ = { + "HTTPS": "on", + # pyOpenSSL doesn't provide access to any of these AFAICT +## 'SSL_PROTOCOL': 'SSLv2', +## SSL_CIPHER string The cipher specification name +## SSL_VERSION_INTERFACE string The mod_ssl program version +## SSL_VERSION_LIBRARY string The OpenSSL program version + } + + if self.certificate: + # Server certificate attributes + cert = open(self.certificate, 'rb').read() + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) + ssl_environ.update({ + 'SSL_SERVER_M_VERSION': cert.get_version(), + 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), +## 'SSL_SERVER_V_START': Validity of server's certificate (start time), +## 'SSL_SERVER_V_END': Validity of server's certificate (end time), + }) + + for prefix, dn in [("I", cert.get_issuer()), + ("S", cert.get_subject())]: + # X509Name objects don't seem to have a way to get the + # complete DN string. Use str() and slice it instead, + # because str(dn) == "" + dnstr = str(dn)[18:-2] + + wsgikey = 'SSL_SERVER_%s_DN' % prefix + ssl_environ[wsgikey] = dnstr + + # The DN should be of the form: /k1=v1/k2=v2, but we must allow + # for any value to contain slashes itself (in a URL). + while dnstr: + pos = dnstr.rfind("=") + dnstr, value = dnstr[:pos], dnstr[pos + 1:] + pos = dnstr.rfind("/") + dnstr, key = dnstr[:pos], dnstr[pos + 1:] + if key and value: + wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) + ssl_environ[wsgikey] = value + + return ssl_environ + + def makefile(self, sock, mode='r', bufsize=-1): + if SSL and isinstance(sock, SSL.ConnectionType): + timeout = sock.gettimeout() + f = SSL_fileobject(sock, mode, bufsize) + f.ssl_timeout = timeout + return f + else: + return wsgiserver.CP_fileobject(sock, mode, bufsize) + diff --git a/src/cherrypy/wsgiserver/wsgiserver2.py b/src/cherrypy/wsgiserver/wsgiserver2.py new file mode 100644 index 0000000000..b6bd499718 --- /dev/null +++ b/src/cherrypy/wsgiserver/wsgiserver2.py @@ -0,0 +1,2322 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_fileobject', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import rfc822 +import socket +import sys +if 'win' in sys.platform and not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 +try: + import cStringIO as StringIO +except ImportError: + import StringIO +DEFAULT_BUFFER_SIZE = -1 + +_fileobject_uses_str_type = isinstance(socket._fileobject(None)._rbuf, basestring) + +import threading +import time +import traceback +def format_exc(limit=None): + """Like print_exc() but return a string. Backport for Python 2.3.""" + try: + etype, value, tb = sys.exc_info() + return ''.join(traceback.format_exception(etype, value, tb, limit)) + finally: + etype = value = tb = None + + +from urllib import unquote +from urlparse import urlparse +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate']] + + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = ", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server= server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response("414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response("400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + rp = int(req_protocol[5]), int(req_protocol[7]) + except (ValueError, IndexError): + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [unquote(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = "%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + sp = int(self.server.protocol[5]), int(self.server.protocol[7]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get("Content-Length", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get("Connection", "") == "close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get("Connection", "") != "Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get("Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == "chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get("Expect", "") == "100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See http://www.cherrypy.org/ticket/951 + msg = self.server.protocol + " 100 Continue\r\n\r\n" + try: + self.conn.wfile.sendall(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + i = uri.find('://') + if i > 0 and QUESTION_MARK not in uri[:i]: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + scheme, remainder = uri[:i].lower(), uri[i + 3:] + authority, path = remainder.split(FORWARD_SLASH, 1) + path = FORWARD_SLASH + path + return scheme, authority, path + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get("Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.sendall("0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [self.server.protocol + SPACE + + status + CRLF, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append("Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [hex(len(chunk))[2:], CRLF, chunk, CRLF] + self.conn.wfile.sendall(EMPTY.join(buf)) + else: + self.conn.wfile.sendall(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif "content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != 'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append(("Transfer-Encoding", "chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if "connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append(("Connection", "close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append(("Connection", "Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if "date" not in hkeys: + self.outheaders.append(("Date", rfc822.formatdate())) + + if "server" not in hkeys: + self.outheaders.append(("Server", self.server.server_name)) + + buf = [self.server.protocol + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.sendall(EMPTY.join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_fileobject(socket._fileobject): + """Faux file object attached to a socket object.""" + + def __init__(self, *args, **kwargs): + self.bytes_read = 0 + self.bytes_written = 0 + socket._fileobject.__init__(self, *args, **kwargs) + + def sendall(self, data): + """Sendall for non-blocking sockets.""" + while data: + try: + bytes_sent = self.send(data) + data = data[bytes_sent:] + except socket.error, e: + if e.args[0] not in socket_errors_nonblocking: + raise + + def send(self, data): + bytes_sent = self._sock.send(data) + self.bytes_written += bytes_sent + return bytes_sent + + def flush(self): + if self._wbuf: + buffer = "".join(self._wbuf) + self._wbuf = [] + self.sendall(buffer) + + def recv(self, size): + while True: + try: + data = self._sock.recv(size) + self.bytes_read += len(data) + return data + except socket.error, e: + if (e.args[0] not in socket_errors_nonblocking + and e.args[0] not in socket_error_eintr): + raise + + if not _fileobject_uses_str_type: + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very inefficient. + # We never leave read() with any leftover data from a new recv() call + # in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned by + # recv() minimizes memory usage and fragmentation that occurs when + # rbufsize is large compared to the typical return value of recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(rbufsize) + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + data = self.recv(left) + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + data = None + recv = self.recv + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + + buf.seek(0, 2) # seek end + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(self._rbufsize) + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + self._rbuf = StringIO.StringIO() # reset _rbuf. we consume it via buf. + while True: + data = self.recv(self._rbufsize) + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when returning + # a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + else: + def read(self, size=-1): + if size < 0: + # Read until EOF + buffers = [self._rbuf] + self._rbuf = "" + if self._rbufsize <= 1: + recv_size = self.default_bufsize + else: + recv_size = self._rbufsize + + while True: + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + return "".join(buffers) + else: + # Read until size bytes or EOF seen, whichever comes first + data = self._rbuf + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + left = size - buf_len + recv_size = max(self._rbufsize, left) + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + def readline(self, size=-1): + data = self._rbuf + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + assert data == "" + buffers = [] + while data != "\n": + data = self.recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + return "".join(buffers) + else: + # Read until size bytes or \n or EOF seen, whichever comes first + nl = data.find('\n', 0, size) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + left = size - buf_len + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + +class HTTPConnection(object): + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_fileobject): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if errnum == 'timed out' or errnum == 'The read operation timed out': + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See http://www.cherrypy.org/ticket/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_fileobject(self.socket._sock, "wb", self.wbufsize) + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + if hasattr(self.socket, '_sock'): + self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + """An object which equals and does math like the integer '0' but evals True.""" + def __add__(self, other): + return other + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ((self.start_time is None) and trueyzero or self.conn.requests_seen), + 'Bytes Read': lambda s: self.bytes_read + ((self.start_time is None) and trueyzero or self.conn.rfile.bytes_read), + 'Bytes Written': lambda s: self.bytes_written + ((self.start_time is None) and trueyzero or self.conn.wfile.bytes_written), + 'Work Time': lambda s: self.work_time + ((self.start_time is None) and trueyzero or time.time() - self.start_time), + 'Read Throughput': lambda s: s['Bytes Read'](s) / (s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / (s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in range(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in range(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit).""" + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections (default 5).""" + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit.""" + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.2.2" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections.""" + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum([w['Requests'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Read'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Written'](w) for w + in s['Worker Threads'].values()], 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum([w['Work Time'](w) for w + in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # SSL backward compatibility + if (self.ssl_adapter is None and + getattr(self, 'ssl_certificate', None) and + getattr(self, 'ssl_private_key', None)): + warnings.warn( + "SSL attributes are deprecated in CherryPy 3.2, and will " + "be removed in CherryPy 3.3. Use an ssl_adapter attribute " + "instead.", + DeprecationWarning + ) + try: + from cherrypy.wsgiserver.ssl_pyopenssl import pyOpenSSLAdapter + except ImportError: + pass + else: + self.ssl_adapter = pyOpenSSLAdapter( + self.ssl_certificate, self.ssl_private_key, + getattr(self, 'ssl_certificate_chain', None)) + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 511) # 0777 + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See http://www.cherrypy.org/ticket/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_fileobject + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See http://www.cherrypy.org/ticket/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + """A base class to interface HTTPServer with other systems, such as WSGI.""" + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + 'pyopenssl': 'cherrypy.wsgiserver.ssl_pyopenssl.pyOpenSSLAdapter', + } + +def get_ssl_adapter_class(name='pyopenssl'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# -------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0], exc_info[1], exc_info[2] + finally: + exc_info = None + + self.req.status = status + for k, v in headers: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.extend(headers) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path, + 'QUERY_STRING': req.qs, + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method, + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol, + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme, + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.iteritems(): + env["HTTP_" + k.upper().replace("-", "_")] = v + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and values + in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = dict([(k.decode('ISO-8859-1'), v) for k, v in env_10.iteritems()]) + env[u'wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault(u'wsgi.url_encoding', u'utf-8') + try: + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env[u'wsgi.url_encoding'] = u'ISO-8859-1' + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + + for k, v in sorted(env.items()): + if isinstance(v, str) and k not in ('REQUEST_URI', 'wsgi.input'): + env[k] = v.decode('ISO-8859-1') + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort(cmp=lambda x,y: cmp(len(x[0]), len(y[0]))) + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] + diff --git a/src/cherrypy/wsgiserver/wsgiserver3.py b/src/cherrypy/wsgiserver/wsgiserver3.py new file mode 100644 index 0000000000..62db5ffd3b --- /dev/null +++ b/src/cherrypy/wsgiserver/wsgiserver3.py @@ -0,0 +1,2040 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_makefile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import email.utils +import socket +import sys +if 'win' in sys.platform and not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 +if sys.version_info < (3,1): + import io +else: + import _pyio as io +DEFAULT_BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE + +import threading +import time +from traceback import format_exc +from urllib.parse import unquote +from urllib.parse import urlparse +from urllib.parse import scheme_chars +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given encoding.""" + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", + ) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate']] + + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = b", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + +class SizeCheckWrapper(object): + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See http://www.cherrypy.org/ticket/421 + if len(data) < 256 or data[-1:] == "\n": + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server= server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response("414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response("413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response("400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + # The [x:y] slicing is necessary for byte strings to avoid getting ord's + rp = int(req_protocol[5:6]), int(req_protocol[7:8]) + except ValueError: + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [self.unquote_bytes(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = b"%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + # The [x:y] slicing is necessary for byte strings to avoid getting ord's + sp = int(self.server.protocol[5:6]), int(self.server.protocol[7:8]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get(b"Content-Length", 0)) > mrbs: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get(b"Connection", b"") == b"close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get(b"Connection", b"") != b"Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get(b"Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(b",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == b"chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get(b"Expect", b"") == b"100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See http://www.cherrypy.org/ticket/951 + msg = self.server.protocol.encode('ascii') + b" 100 Continue\r\n\r\n" + try: + self.conn.wfile.write(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + scheme, sep, remainder = uri.partition(b'://') + if sep and QUESTION_MARK not in scheme: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + authority, path_a, path_b = remainder.partition(FORWARD_SLASH) + return scheme.lower(), authority, path_a+path_b + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def unquote_bytes(self, path): + """takes quoted string and unquotes % encoded values""" + res = path.split(b'%') + + for i in range(1, len(res)): + item = res[i] + try: + res[i] = bytes([int(item[:2], 16)]) + item[2:] + except ValueError: + raise + return b''.join(res) + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get(b"Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response("413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.write(b"0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [bytes(self.server.protocol, "ascii") + SPACE + + bytes(status, "ISO-8859-1") + CRLF, + bytes("Content-Length: %s\r\n" % len(msg), "ISO-8859-1"), + b"Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append(b"Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.write(b"".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [bytes(hex(len(chunk)), 'ASCII')[2:], CRLF, chunk, CRLF] + self.conn.wfile.write(EMPTY.join(buf)) + else: + self.conn.wfile.write(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif b"content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != b'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append((b"Transfer-Encoding", b"chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if b"connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append((b"Connection", b"close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append((b"Connection", b"Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if b"date" not in hkeys: + self.outheaders.append( + (b"Date", email.utils.formatdate(usegmt=True).encode('ISO-8859-1'))) + + if b"server" not in hkeys: + self.outheaders.append( + (b"Server", self.server.server_name.encode('ISO-8859-1'))) + + buf = [self.server.protocol.encode('ascii') + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.write(EMPTY.join(buf)) + + +class NoSSLError(Exception): + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_BufferedWriter(io.BufferedWriter): + """Faux file object attached to a socket object.""" + + def write(self, b): + self._checkClosed() + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + + with self._write_lock: + self._write_buf.extend(b) + self._flush_unlocked() + return len(b) + + def _flush_unlocked(self): + self._checkClosed("flush of closed file") + while self._write_buf: + try: + # ssl sockets only except 'bytes', not bytearrays + # so perhaps we should conditionally wrap this for perf? + n = self.raw.write(bytes(self._write_buf)) + except io.BlockingIOError as e: + n = e.characters_written + del self._write_buf[:n] + + +def CP_makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + if 'r' in mode: + return io.BufferedReader(socket.SocketIO(sock, mode), bufsize) + else: + return CP_BufferedWriter(socket.SocketIO(sock, mode), bufsize) + +class HTTPConnection(object): + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_makefile): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if errnum == 'timed out' or errnum == 'The read operation timed out': + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See http://www.cherrypy.org/ticket/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_makefile(self.socket._sock, "wb", self.wbufsize) + req.simple_response("400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel socket + # when you call socket.close(). We do so manually here because we + # want this server to send a FIN TCP segment immediately. Note this + # must be called *before* calling socket.close(), because the latter + # drops its reference to the kernel socket. + # Python 3 *probably* fixed this with socket._real_close; hard to tell. +## self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + """An object which equals and does math like the integer '0' but evals True.""" + def __add__(self, other): + return other + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + +class WorkerThread(threading.Thread): + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ((self.start_time is None) and trueyzero or self.conn.requests_seen), + 'Bytes Read': lambda s: self.bytes_read + ((self.start_time is None) and trueyzero or self.conn.rfile.bytes_read), + 'Bytes Written': lambda s: self.bytes_written + ((self.start_time is None) and trueyzero or self.conn.wfile.bytes_written), + 'Work Time': lambda s: self.work_time + ((self.start_time is None) and trueyzero or time.time() - self.start_time), + 'Read Throughput': lambda s: s['Bytes Read'](s) / (s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / (s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue() + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + for i in range(amount): + if self.max > 0 and len(self._threads) >= self.max: + break + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + self._threads.append(worker) + worker.start() + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + if amount > 0: + for i in range(min(amount, len(self._threads) - self.min)): + # Put a number of shutdown requests on the queue equal + # to 'amount'. Once each of those is processed by a worker, + # that worker will terminate and be culled from our list + # in self.put. + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See http://www.cherrypy.org/ticket/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not windll.kernel32.SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit).""" + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections (default 5).""" + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit.""" + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.2.2" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections.""" + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum([w['Requests'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Read'](w) for w + in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum([w['Bytes Written'](w) for w + in s['Worker Threads'].values()], 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum([w['Work Time'](w) for w + in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property(_get_bind_addr, _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: os.unlink(self.bind_addr) + except: pass + + # So everyone can access the socket... + try: os.chmod(self.bind_addr, 511) # 0777 + except: pass + + info = [(socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error: + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See http://www.cherrypy.org/ticket/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_makefile + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.write("".join(buf).encode('ISO-8859-1')) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + self.requests.put(conn) + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See http://www.cherrypy.org/ticket/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See http://www.cherrypy.org/ticket/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See http://www.cherrypy.org/ticket/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See http://www.cherrypy.org/ticket/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + """A base class to interface HTTPServer with other systems, such as WSGI.""" + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + } + +def get_ssl_adapter_class(name='builtin'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# -------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5): + self.requests = ThreadPool(self, min=numthreads or 1, max=max) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info = None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) + finally: + exc_info = None + + # According to PEP 3333, when using Python 3, the response status + # and headers must be bytes masquerading as unicode; that is, they + # must be of type "str" but are restricted to code points in the + # "latin-1" set. + if not isinstance(status, str): + raise TypeError("WSGI response status is not of type str.") + self.req.status = status.encode('ISO-8859-1') + + for k, v in headers: + if not isinstance(k, str): + raise TypeError("WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError("WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.append((k.encode('ISO-8859-1'), v.encode('ISO-8859-1'))) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path.decode('ISO-8859-1'), + 'QUERY_STRING': req.qs.decode('ISO-8859-1'), + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method.decode('ISO-8859-1'), + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol.decode('ISO-8859-1'), + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme.decode('ISO-8859-1'), + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.items(): + k = k.decode('ISO-8859-1').upper().replace("-", "_") + env["HTTP_" + k] = v.decode('ISO-8859-1') + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and values + in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = env_10.copy() + env['wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault('wsgi.url_encoding', 'utf-8') + try: + # SCRIPT_NAME is the empty string, who cares what encoding it is? + env["PATH_INFO"] = req.path.decode(env['wsgi.url_encoding']) + env["QUERY_STRING"] = req.qs.decode(env['wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env['wsgi.url_encoding'] = 'ISO-8859-1' + env["PATH_INFO"] = env_10["PATH_INFO"] + env["QUERY_STRING"] = env_10["QUERY_STRING"] + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + +class WSGIPathInfoDispatcher(object): + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort() + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] + From 2c3fb107485e7621bc9be8f393ee884288987ba2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 26 Dec 2011 12:04:28 +0530 Subject: [PATCH 13/74] Fix #908533 (Huawei X6/Wellcom a99 not detected) --- src/calibre/devices/android/driver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 4f2029dc6a..a6ecb85128 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -93,6 +93,7 @@ class ANDROID(USBMS): # Viewsonic/Vizio 0x0489 : { + 0xc000 : [0x0226], 0xc001 : [0x0226], 0xc004 : [0x0226], 0x8801 : [0x0226, 0x0227], From f2b04264458e3831982e2910258d61738be30998 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 26 Dec 2011 13:31:12 +0530 Subject: [PATCH 14/74] Do not loose position in book list on device connection/sync, if a search is active. Fixes #908553 (Main window scrolls automatically to top upon completion of task) --- src/calibre/gui2/library/views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index 3244a35545..cb73b5ddf0 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -51,8 +51,7 @@ class PreserveViewState(object): # {{{ traceback.print_exc() def __exit__(self, *args): - current = self.view.get_selected_ids() - if not current and self.selected_ids: + if self.selected_ids: if self.current_id is not None: self.view.current_id = self.current_id self.view.select_rows(self.selected_ids, using_ids=True, From 0360b314d9a28e3175923738f51ed244bb84a16c Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Mon, 26 Dec 2011 10:10:41 +0100 Subject: [PATCH 15/74] Improvements to search section of the manual -- indicate which words are translated. --- src/calibre/manual/gui.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 34dad57d93..f048d99d1b 100755 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -362,8 +362,9 @@ The syntax for searching for dates is:: If the date is ambiguous, the current locale is used for date comparison. For example, in an mm/dd/yyyy locale 2/1/2009 is interpreted as 1 Feb 2009. In a dd/mm/yyyy locale it is interpreted as 2 Jan 2009. Some special date strings are available. The string ``today`` translates to today's date, whatever it is. The -strings ``yesterday`` and ``thismonth`` also work. In addition, the string ``daysago`` can be used to compare -to a date some number of days ago. For example:: +strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work. +In addition, the string ``daysago`` (also translated) can be used to compare to a date some number of days ago. +For example:: date:>10daysago date:<=45daysago @@ -399,7 +400,7 @@ You can search for the absence or presence of a field using the special "true" a Yes/no custom columns are searchable. Searching for ``false``, ``empty``, or ``blank`` will find all books with undefined values in the column. Searching for ``true`` will find all books that do not have undefined values in the column. Searching for ``yes`` or ``checked`` will find all books with ``Yes`` in the column. -Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. +Searching for ``no`` or ``unchecked`` will find all books with ``No`` in the column. Note that the words ``yes``, ``no``, ``blank``, ``empty``, ``checked`` and ``unchecked`` are translated; you must use the current language's equivalent word. The words ``true`` and ``false`` and the special values ``_yes`` and ``_no`` are not translated. Hierarchical items (e.g. A.B.C) use an extended syntax to match initial parts of the hierarchy. This is done by adding a period between the exact match indicator (=) and the text. For example, the query ``tags:=.A`` will find the tags `A` and `A.B`, but will not find the tags `AA` or `AA.B`. The query ``tags:=.A.B`` will find the tags `A.B` and `A.B.C`, but not the tag `A`. From c7f90b9fc7bbf9cf9b2e06cf02bac8bc8e94efad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 26 Dec 2011 23:47:05 +0530 Subject: [PATCH 16/74] Conversion pipeline: Disable HTML 5 parsing if it results in deeply nested trees. Fixes #908818 (RuntimeError: maximum recursion depth exceeded in cmp) --- src/calibre/ebooks/mobi/reader.py | 2 +- src/calibre/ebooks/oeb/parse_utils.py | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 8637cfb9ae..e58b492cef 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -502,7 +502,7 @@ class MobiReader(object): self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace(']*>', '', self.processed_html) - self.processed_html = re.sub(r'<(/?)o:p', r'<\1p', self.processed_html) + self.processed_html = re.sub(r'<\s*(/?)\s*o:p[^>]*>', r'', self.processed_html) # Swap inline and block level elements, and order block level elements according to priority # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec self.processed_html = re.sub(r'(?i)(?P(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P]*>)', '\g'+'\g', self.processed_html) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index f8456914b9..e02a4d0e61 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -70,9 +70,27 @@ def clone_element(elem, nsmap={}, in_context=True): nelem.extend(elem) return nelem -def html5_parse(data): +def node_depth(node): + ans = 0 + p = node.getparent() + while p is not None: + ans += 1 + p = p.getparent() + return ans + +def html5_parse(data, max_nesting_depth=500): import html5lib data = html5lib.parse(data, treebuilder='lxml').getroot() + + # Check that the asinine HTML 5 algorithm did not result in a tree with + # insane nesting depths + for x in data.iterdescendants(): + if len(x) == 0: + # Leaf node + depth = node_depth(x) + if depth > max_nesting_depth: + raise ValueError('html5lib resulted in a tree with nesting' + ' depth > %d'%max_nesting_depth) # Set lang correctly xl = data.attrib.pop('xmlU0003Alang', None) if xl is not None and 'lang' not in data.attrib: From b715e71fceff9cd8b694414f3bd1c7dc48b9f2bc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 26 Dec 2011 23:55:05 +0530 Subject: [PATCH 17/74] ... --- src/calibre/ebooks/oeb/parse_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index e02a4d0e61..af0eb479f0 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -85,8 +85,7 @@ def html5_parse(data, max_nesting_depth=500): # Check that the asinine HTML 5 algorithm did not result in a tree with # insane nesting depths for x in data.iterdescendants(): - if len(x) == 0: - # Leaf node + if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node depth = node_depth(x) if depth > max_nesting_depth: raise ValueError('html5lib resulted in a tree with nesting' From 231e8aeca3dfdf45e3d5677edbf6d6386619c395 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 00:19:43 +0530 Subject: [PATCH 18/74] Fix regression that broke customizing toolbars on non English calibre installs --- src/calibre/gui2/preferences/toolbar.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/calibre/gui2/preferences/toolbar.py b/src/calibre/gui2/preferences/toolbar.py index a173b53ed5..bae7a83e0b 100644 --- a/src/calibre/gui2/preferences/toolbar.py +++ b/src/calibre/gui2/preferences/toolbar.py @@ -17,10 +17,10 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget class FakeAction(object): - def __init__(self, name, icon, tooltip=None, + def __init__(self, name, gui_name, icon, tooltip=None, dont_add_to=frozenset([]), dont_remove_from=frozenset([])): self.name = name - self.action_spec = (name, icon, tooltip, None) + self.action_spec = (gui_name, icon, tooltip, None) self.dont_remove_from = dont_remove_from self.dont_add_to = dont_add_to @@ -28,17 +28,18 @@ class BaseModel(QAbstractListModel): def name_to_action(self, name, gui): if name == 'Donate': - return FakeAction(_('Donate'), 'donate.png', + return FakeAction('Donate', _('Donate'), 'donate.png', dont_add_to=frozenset(['context-menu', 'context-menu-device'])) if name == 'Location Manager': - return FakeAction(_('Location Manager'), None, + return FakeAction('Location Manager', _('Location Manager'), 'reader.png', _('Switch between library and device views'), dont_add_to=frozenset(['menubar', 'toolbar', 'toolbar-child', 'context-menu', 'context-menu-device'])) if name is None: - return FakeAction('--- '+_('Separator')+' ---', None, + return FakeAction('--- '+('Separator')+' ---', + '--- '+_('Separator')+' ---', None, dont_add_to=frozenset(['menubar', 'menubar-device'])) try: return gui.iactions[name] @@ -314,7 +315,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): if not pref_in_toolbar and not pref_in_menubar: self.models['menubar'][1].add(['Preferences']) if not lm_in_toolbar and not lm_in_menubar: - self.models['menubar-device'][1].add(['Location Manager']) + m = self.models['toolbar-device'][1] + m.add(['Location Manager']) + m.move(m.index(m.rowCount(None)-1), 5-m.rowCount(None)) # Save data. for am, cm in self.models.values(): From 6f955c596fc5c9063e4c7da5c5f61932dcfa4066 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 00:47:45 +0530 Subject: [PATCH 19/74] ... --- src/calibre/ebooks/oeb/parse_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index af0eb479f0..5b70574e13 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -78,7 +78,7 @@ def node_depth(node): p = p.getparent() return ans -def html5_parse(data, max_nesting_depth=500): +def html5_parse(data, max_nesting_depth=300): import html5lib data = html5lib.parse(data, treebuilder='lxml').getroot() From 0b58a434962372ac9ca245d00de43ac7634e26fe Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Mon, 26 Dec 2011 20:11:05 -0400 Subject: [PATCH 20/74] Kobo hide the Previews and Recommendations on Touch with options to show --- src/calibre/devices/kobo/driver.py | 40 ++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 6e58e27cc7..8e43a68a3e 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -25,7 +25,7 @@ class KOBO(USBMS): gui_name = 'Kobo Reader' description = _('Communicate with the Kobo Reader') author = 'Timothy Legge' - version = (1, 0, 11) + version = (1, 0, 12) dbversion = 0 fwversion = 0 @@ -67,19 +67,31 @@ class KOBO(USBMS): ' in the datbase. With this option Calibre will show the ' 'expired records and allow you to delete them with ' 'the new delete logic.'), + _('Show Previews') + + ':::'+_('Kobo previews are included on the Touch and some other versions' + ' by default they are no longer displayed as there is no good reason to ' + 'see them. Enable if you wish to see/delete them.'), + _('Show Recommendations') + + ':::'+_('Kobo no shows recomendations on the device. In some case these have ' + 'files but in other cases they are just pointers to the web site to buy. ' + 'Enable if you wish to see/delete them.'), ] EXTRA_CUSTOMIZATION_DEFAULT = [ ', '.join(['tags']), True, True, - True + True, + False, + False ] OPT_COLLECTIONS = 0 OPT_UPLOAD_COVERS = 1 OPT_UPLOAD_GRAYSCALE_COVERS = 2 OPT_SHOW_EXPIRED_BOOK_RECORDS = 3 + OPT_SHOW_PREVIEWS = 4 + OPT_SHOW_RECOMMENDATIONS = 5 def initialize(self): USBMS.initialize(self) @@ -161,6 +173,8 @@ class KOBO(USBMS): # Label Previews if accessibility == 6: playlist_map[lpath].append('Preview') + elif accessibility == 4: + playlist_map[lpath].append('Recommendation') path = self.normalize_path(path) # print "Normalized FileName: " + path @@ -241,31 +255,40 @@ class KOBO(USBMS): debug_print("Database Version: ", self.dbversion) opts = self.settings() - if self.dbversion >= 16: + if self.dbversion >= 33: query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ - 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \ + 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, IsDownloaded from content where ' \ + 'BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \ + if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')', \ + previews=' and Accessibility <> 6' \ + if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \ + recomendations=' and IsDownloaded in (\'true\', 1)' \ + if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else '') + elif self.dbversion >= 16 and self.dbversion < 33: + query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ + 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, "1" as IsDownloaded from content where ' \ 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \ if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')') elif self.dbversion < 16 and self.dbversion >= 14: query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ - 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \ + 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility, "1" as IsDownloaded from content where ' \ 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \ if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')') elif self.dbversion < 14 and self.dbversion >= 8: query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ - 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \ + 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, "1" as IsDownloaded from content where ' \ 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \ if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')') else: query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ - 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null' + 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, "1" as IsDownloaded from content where BookID is Null' try: cursor.execute (query) except Exception as e: err = str(e) if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or - 'Accessibility' in err): + 'Accessibility' in err or 'IsDownloaded' in err): raise query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as ' @@ -701,6 +724,7 @@ class KOBO(USBMS): accessibilitylist = { "Preview":6, + "Recommendation":4, } # debug_print('Starting update_device_database_collections', collections_attributes) From 252cd2507ceefb16183b932cf20ae29f5a535372 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 08:58:52 +0530 Subject: [PATCH 21/74] Fix #908912 (Updated recipe for La Razon from Bolivia) --- recipes/la_razon_bo.recipe | 52 ++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/recipes/la_razon_bo.recipe b/recipes/la_razon_bo.recipe index 6af899b760..51ec024350 100644 --- a/recipes/la_razon_bo.recipe +++ b/recipes/la_razon_bo.recipe @@ -1,10 +1,9 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' www.la-razon.com ''' -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class LaRazon_Bol(BasicNewsRecipe): @@ -16,19 +15,16 @@ class LaRazon_Bol(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 200 no_stylesheets = True - encoding = 'cp1252' + encoding = 'utf8' use_embedded_content = False language = 'es_BO' publication_type = 'newspaper' - delay = 1 remove_empty_feeds = True - cover_url = strftime('http://www.la-razon.com/portadas/%Y%m%d_LaRazon.jpg') - masthead_url = 'http://www.la-razon.com/imagenes/logo.jpg' - extra_css = """ body{font-family: Arial,Helvetica,sans-serif } - img{margin-bottom: 0.4em} - .noticia-titulo{font-family: Georgia,"Times New Roman",Times,serif} - .lead{font-weight: bold; font-size: 0.8em} - """ + masthead_url = 'http://www.la-razon.com/static/LRZRazon/images/lrz-logo.png' + extra_css = """ body{font-family: Georgia,"Times New Roman",Times,serif} + img{margin-bottom: 0.4em; display: block} + .meta{font-size: small; font-family: Arial,Helvetica,sans-serif} + """ conversion_options = { 'comment' : description @@ -37,25 +33,27 @@ class LaRazon_Bol(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(name='div', attrs={'class':['noticia-titulo','noticia-desarrollo']})] - remove_tags = [dict(name=['meta','link','form','iframe','embed','object'])] + keep_only_tags = [dict(name='div', attrs={'class':['pg-hd', 'pg-bd']})] + remove_tags = [ + dict(name=['meta','link','form','iframe','embed','object']) + ,dict(name='div', attrs={'class':'bd'}) + ] remove_attributes = ['width','height'] feeds = [ - (u'Editorial' , u'http://www.la-razon.com/rss_editorial.php' ) - ,(u'Opinión' , u'http://www.la-razon.com/rss_opinion.php' ) - ,(u'Nacional' , u'http://www.la-razon.com/rss_nacional.php' ) - ,(u'Economia' , u'http://www.la-razon.com/rss_economia.php' ) - ,(u'Ciudades' , u'http://www.la-razon.com/rss_ciudades.php' ) - ,(u'Sociedad' , u'http://www.la-razon.com/rss_sociedad.php' ) - ,(u'Mundo' , u'http://www.la-razon.com/rss_sociedad.php' ) - ,(u'La Revista' , u'http://www.la-razon.com/rss_larevista.php' ) - ,(u'Sociales' , u'http://www.la-razon.com/rss_sociales.php' ) - ,(u'Mia' , u'http://www.la-razon.com/rss_mia.php' ) - ,(u'Marcas' , u'http://www.la-razon.com/rss_marcas.php' ) - ,(u'Escape' , u'http://www.la-razon.com/rss_escape.php' ) - ,(u'El Financiero' , u'http://www.la-razon.com/rss_financiero.php') - ,(u'Tendencias' , u'http://www.la-razon.com/rss_tendencias.php') + (u'Editorial' , u'http://www.la-razon.com/rss/opinion/editorial/' ) + ,(u'Nacional' , u'http://www.la-razon.com/rss/nacional/' ) + ,(u'Economia' , u'http://www.la-razon.com/rss/economia/' ) + ,(u'Ciudades' , u'http://www.la-razon.com/rss/ciudades/' ) + ,(u'Sociedad' , u'http://www.la-razon.com/rss/sociedad/' ) + ,(u'Mundo' , u'http://www.la-razon.com/rss/mundo/' ) + ,(u'La Revista' , u'http://www.la-razon.com/rss/la_revista/' ) + ,(u'Sociales' , u'http://www.la-razon.com/rss/sociales/' ) + ,(u'Mia' , u'http://www.la-razon.com/rss/suplementos/mia/' ) + ,(u'Marcas' , u'http://www.la-razon.com/rss/marcas/' ) + ,(u'Escape' , u'http://www.la-razon.com/rss/suplementos/escape/' ) + ,(u'El Financiero' , u'http://www.la-razon.com/rss/suplementos/financiero/') + ,(u'Tendencias' , u'http://www.la-razon.com/rss/suplementos/tendencias/') ] def preprocess_html(self, soup): From 19d5daae8ebd9a04704b76fcb562f56b921dfb6e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 09:06:28 +0530 Subject: [PATCH 22/74] ... --- src/calibre/devices/kobo/driver.py | 6 +++--- src/calibre/ebooks/oeb/parse_utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 8e43a68a3e..4e27dac96d 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -67,12 +67,12 @@ class KOBO(USBMS): ' in the datbase. With this option Calibre will show the ' 'expired records and allow you to delete them with ' 'the new delete logic.'), - _('Show Previews') + + _('Show Previews') + ':::'+_('Kobo previews are included on the Touch and some other versions' ' by default they are no longer displayed as there is no good reason to ' 'see them. Enable if you wish to see/delete them.'), - _('Show Recommendations') + - ':::'+_('Kobo no shows recomendations on the device. In some case these have ' + _('Show Recommendations') + + ':::'+_('Kobo now shows recommendations on the device. In some case these have ' 'files but in other cases they are just pointers to the web site to buy. ' 'Enable if you wish to see/delete them.'), ] diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index 5b70574e13..d262a6782c 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -78,7 +78,7 @@ def node_depth(node): p = p.getparent() return ans -def html5_parse(data, max_nesting_depth=300): +def html5_parse(data, max_nesting_depth=100): import html5lib data = html5lib.parse(data, treebuilder='lxml').getroot() From 595fe0f71961a29c7ee6ebabe184e4aadab2c955 Mon Sep 17 00:00:00 2001 From: Translators <> Date: Tue, 27 Dec 2011 04:56:30 +0000 Subject: [PATCH 23/74] Launchpad automatic translations update. --- src/calibre/translations/de.po | 8 +- src/calibre/translations/fr.po | 2 +- src/calibre/translations/it.po | 15 +-- src/calibre/translations/uk.po | 184 ++++++++++++++++++++++++++++++++- 4 files changed, 196 insertions(+), 13 deletions(-) diff --git a/src/calibre/translations/de.po b/src/calibre/translations/de.po index 3ef438ca4a..33ff6f77e1 100644 --- a/src/calibre/translations/de.po +++ b/src/calibre/translations/de.po @@ -8,14 +8,14 @@ msgstr "" "Project-Id-Version: de\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-23 07:12+0000\n" -"PO-Revision-Date: 2011-12-21 16:08+0000\n" -"Last-Translator: Mirko Russo \n" +"PO-Revision-Date: 2011-12-26 10:52+0000\n" +"Last-Translator: Daniel Winzen \n" "Language-Team: American English \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" -"X-Launchpad-Export-Date: 2011-12-24 05:22+0000\n" +"X-Launchpad-Export-Date: 2011-12-27 04:55+0000\n" "X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: 3327,-1,-1,-1,-1,-1,-1,-1,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -9710,7 +9710,7 @@ msgstr "Autorensortierung" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/edit_authors_dialog.py:55 msgid "Link" -msgstr "Link" +msgstr "Verknüpfung" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/edit_authors_dialog.py:122 #: /home/kovid/work/calibre/src/calibre/gui2/lrf_renderer/main.py:160 diff --git a/src/calibre/translations/fr.po b/src/calibre/translations/fr.po index 7dfa3b88c0..87d4ec4a85 100644 --- a/src/calibre/translations/fr.po +++ b/src/calibre/translations/fr.po @@ -15,7 +15,7 @@ msgstr "" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n > 1;\n" -"X-Launchpad-Export-Date: 2011-12-26 04:51+0000\n" +"X-Launchpad-Export-Date: 2011-12-27 04:55+0000\n" "X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: 1177,1104,-1,-1,-1,-1,-1,-1,-1,-1\n" "Generated-By: pygettext.py 1.5\n" diff --git a/src/calibre/translations/it.po b/src/calibre/translations/it.po index 74249fe750..724a2a4049 100644 --- a/src/calibre/translations/it.po +++ b/src/calibre/translations/it.po @@ -10,14 +10,14 @@ msgstr "" "Project-Id-Version: calibre_calibre-it\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2011-12-23 07:12+0000\n" -"PO-Revision-Date: 2011-12-21 18:34+0000\n" +"PO-Revision-Date: 2011-12-26 09:41+0000\n" "Last-Translator: Vincenzo Reale \n" "Language-Team: Italian \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" -"X-Launchpad-Export-Date: 2011-12-24 05:25+0000\n" +"X-Launchpad-Export-Date: 2011-12-27 04:56+0000\n" "X-Generator: Launchpad (build 14560)\n" "X-Poedit-Bookmarks: -1,-1,-1,-1,-1,1105,-1,1312,-1,-1\n" "Generated-By: pygettext.py 1.5\n" @@ -4954,7 +4954,7 @@ msgstr "categorie del navigatore dei tag da non visualizzare" #: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:280 msgid "Show this confirmation again" -msgstr "" +msgstr "Mostra ancora questa conferma" #: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:529 msgid "Choose Files" @@ -8996,21 +8996,24 @@ msgstr "Reimposta autore a Sconosciuto" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:72 msgid "Some invalid ISBNs" -msgstr "" +msgstr "Alcuni ISBN non validi" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:73 msgid "" "Some of the ISBNs you entered were invalid. They will be ignored. Click Show " "Details to see which ones. Do you want to proceed?" msgstr "" +"Alcuni ISBN inseriti non sono validi. Saranno ignorati. Fai clic su Mostra " +"dettagli per sapere quali. Vuoi continuare?" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:79 msgid "All invalid ISBNs" -msgstr "" +msgstr "Tutti gli ISBBN non validi" #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:80 msgid "All the ISBNs you entered were invalid. No books can be added." msgstr "" +"Tutti gli ISBN inseriti non sono validi. Non può essere aggiunto alcun libro." #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:63 msgid "Add books by ISBN" @@ -14048,7 +14051,7 @@ msgstr "Il titolo %s è già in uso" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/create_custom_column.py:271 msgid "You must enter a template for composite columns" -msgstr "" +msgstr "Devi inserire un modello per le colonne composte" #: /home/kovid/work/calibre/src/calibre/gui2/preferences/create_custom_column.py:281 msgid "You must enter at least one value for enumeration columns" diff --git a/src/calibre/translations/uk.po b/src/calibre/translations/uk.po index 1e42ac42a3..666d7e25f5 100644 --- a/src/calibre/translations/uk.po +++ b/src/calibre/translations/uk.po @@ -9,7 +9,7 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME \n" "POT-Creation-Date: 2011-12-23 07:12+0000\n" -"PO-Revision-Date: 2011-12-25 21:04+0000\n" +"PO-Revision-Date: 2011-12-26 20:12+0000\n" "Last-Translator: yurchor \n" "Language-Team: Ukrainian \n" "MIME-Version: 1.0\n" @@ -17,7 +17,7 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && " "n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" -"X-Launchpad-Export-Date: 2011-12-26 04:52+0000\n" +"X-Launchpad-Export-Date: 2011-12-27 04:56+0000\n" "X-Generator: Launchpad (build 14560)\n" "Language: uk\n" @@ -21601,6 +21601,39 @@ msgid "" "timestamp default if not set: dd MMM yyyy\n" "last_modified_display_format if not set: dd MMM yyyy" msgstr "" +"Формат, який буде використано для дати публікації та часової позначки " +"(дати).\n" +"Рядок, який керує способом показу дати публікації у графічному інтерфейсі\n" +"d номер дня без початкового нуля (від 1 до 31)\n" +"dd номер дня з початковим нулем (від 01 до 31)\n" +"ddd скорочений локалізований запис дня (від «пн» до «нд»).\n" +"dddd локалізована назва дня повністю (від «понеділок» до «неділя»).\n" +"M номер місяця без початкового нуля (1-12)\n" +"MM номер місяця з початковим нулем (01-12)\n" +"MMM скорочена локалізована назва місяця (від «січ» до «гру»).\n" +"MMMM локалізована назва місяця повністю (від «січень» до «грудень»).\n" +"yy двоцифровий запис року (00-99)\n" +"yyyy чотирицифровий запис року\n" +"h запис годин без початкового 0 (від 0 до 11 або від 0 до 23, залежно " +"від значення am/pm) hh запис годин з початковим 0 (від 00 до 11 або від " +"00 до 23, залежно від значення am/pm) m запис хвилин без початкового 0 " +"(від 0 до 59)\n" +"mm запис хвилин з початковим 0 (від 00 до 59)\n" +"s запис секунд без початкового 0 (від 0 до 59)\n" +"ss запис секунд з початковим 0 (від 00 до 59)\n" +"ap використовувати 12-годинний запис замість 24-годинного, де «ap»\n" +"буде замінено локалізованим рядком для am або pm.\n" +"AP використовувати 12-годинний запис замість 24-годинного, де «ap»\n" +"буде замінено локалізованим рядком для AM або PM.\n" +"iso дата з часом і часовим поясом. Вказувати можна лише одне значення " +"формату.\n" +"Наприклад, якщо датою є 9 січня 2010 року, буде визначено таку " +"відповідність\n" +"MMM yyyy ==> січ 2010 yyyy ==> 2010 dd MMM yyyy ==> 09 січ 2010\n" +"MM/yyyy ==> 01/2010 d/M/yy ==> 9/1/10 yy ==> 10\n" +"Типовий формат дати публікації: MMM yyyy\n" +"Типовий формат позначки часу: dd MMM yyyy\n" +"Формат last_modified_display_format: dd MMM yyyy" #: /home/kovid/work/calibre/resources/default_tweaks.py:174 msgid "Control sorting of titles and series in the library display" @@ -21626,6 +21659,19 @@ msgid "" "return\n" "without changing anything is sufficient to change the sort." msgstr "" +"Керує впорядкуванням назв і серій на панелі бібліотеки. Якщо встановлено\n" +"«library_order», замість назви буде використано поле впорядкованої назви.\n" +"Якщо вами не виконувалося редагування поля впорядкованої назви вручну,\n" +"початкові артиклі «The» і «A» буде проігноровано. Якщо встановлено у\n" +"значення «strictly_alphabetic», назви буде впорядковано як є\n" +"(впорядковано за назвами, а не за впорядкованими назвами). Наприклад, з\n" +"library_order «The Client» буде показано у списку літери «C».\n" +"З strictly_alphabetic цю книгу буде показано у списку літери «T».\n" +"Цей прапорець впливатиме на показ бібліотеки Calibre. Змін впорядкування\n" +"на пристроях виконано не буде. Крім того, порядок назви книг, доданих до\n" +"зміни значення прапорця, не буде змінено до внесення змін до назви.\n" +"Для внесення змін до впорядкування достатньо подвійного клацання\n" +"на назві з наступним натисканням клавіші Enter." #: /home/kovid/work/calibre/resources/default_tweaks.py:188 msgid "Control formatting of title and series when used in templates" @@ -21647,6 +21693,18 @@ msgid "" "will become \"Lord of the Rings, The\". If the tweak is set to\n" "strictly_alphabetic, it would remain \"The Lord of the Rings\"." msgstr "" +"Керує способом форматування назв книг і серій під час збереження на диск " +"або\n" +"надсилання на пристрій. Поведінка залежить від поля, яке обробляється. Якщо\n" +"обробляється поле назви і встановлено значення «library_order», заголовок\n" +"буде замінено на впорядковану назву. Якщо встановлено значення\n" +"«strictly_alphabetic», назву не буде змінено. Під час обробки серій, якщо\n" +"встановлено значення «library_order», артиклі «The» і «An» буде пересунуто\n" +"в кінець назви. Якщо встановлено значення «strictly_alphabetic», серії\n" +"буде надіслано без змін. Наприклад, якщо для коригування встановлено\n" +"значення «library_order», «The Lord of the Rings» буде замінено на\n" +"«Lord of the Rings, The». Якщо встановлено значення коригування,\n" +"«strictly_alphabetic», залишиться назва «The Lord of the Rings»." #: /home/kovid/work/calibre/resources/default_tweaks.py:201 msgid "Set the list of words considered to be \"articles\" for sort strings" @@ -21672,6 +21730,19 @@ msgid "" "interface language is used. The setting title_sort_articles is ignored\n" "(present only for legacy reasons)." msgstr "" +"Встановити список слів, які вважатимуться артиклями під час визначення\n" +"рядків впорядкованих назв. У різних мовах використовують різні артиклі.\n" +"Типово, у calibre використовується комбінація артиклів з англійської та\n" +"встановленої мови інтерфейсу calibre. Крім того, у певних контекстах,\n" +"коли можна визначити мову книги, використовуватиметься визначена мова.\n" +"Ви можете змінити список артиклів для певної мови або додати нову мову\n" +"редагуванням параметра per_language_title_sort_articles. Щоб наказати\n" +"calibre використовувати мову, відмінну від мови інтерфейсу користувача,\n" +"встановіть відповідне значення параметра default_language_for_title_sort.\n" +"Наприклад, щоб скористатися німецькою, встановіть значення «deu».\n" +"Значення «None» призведе до використання мови інтерфейсу користувача\n" +"Значення title_sort_articles буде проігноровано (цей параметр залишено\n" +"лише з міркувань сумісності із застарілими версіями програми)." #: /home/kovid/work/calibre/resources/default_tweaks.py:253 msgid "Specify a folder calibre should connect to at startup" @@ -21774,6 +21845,64 @@ msgid "" "sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}\n" "sony_collection_name_template='{category:||: }{value}'" msgstr "" +"Визначає правила перейменування для збірок sony. Це коригування можна\n" +"застосовувати, лише якщо виконується автоматична обробка метаданих.\n" +"Назви збірок на пристроях Sony залежать від того, типовим чи нетиповим\n" +"є поле. Збірки, створені на основі типового поля, мають назви, рівні\n" +"значенню цього поля. Наприклад, якщо типовий стовпчик «series» містить\n" +"значення «Darkover», назвою збірки буде «Darkover». Збірки, створені на\n" +"основі нетипового поля, мають назви, мають назви, у яких до значення\n" +"поля додається його назва. Наприклад, якщо у нетиповому стовпчику з\n" +"назвою «My Series» міститься назва «Darkover», типовою назвою збірки буде\n" +"«Darkover (My Series)». У нашому прикладі «Darkover» називається\n" +"значенням, а «My Series» — категорією. Якщо у двох книг будуть поля,\n" +"на основі яких за правилами має бути створено збірки з однаковими назвами,\n" +"обидві книги буде розміщено у одній спільній збірці.\n" +"За допомогою цього набору коригувань ви можете вказати для типового або\n" +"нетипового поля спосіб іменування збірок. Ви можете скористатися ними для\n" +"додавання опису до типового поля, наприклад «Щось (Мітка)» замість просто\n" +"«Щось». Ви також можете скористатися ними для примусового збирання " +"декількох\n" +"полів у одній збірці. Наприклад, ви можете наказати програмі зібрати " +"значення\n" +"у полях «series», «#my_series_1» і «#my_series_2» у збірках з назвами\n" +"«якесь_значення (Серії)», таким чином зібравши всі поля у одному наборі\n" +"збірок.\n" +"Передбачено два пов’язаних коригування. Перше визначає назву категорії для\n" +"поля метаданих. Друге є шаблоном, що використовується для визначення " +"способу\n" +"поєднання значення і категорії з метою створення назви збірки.\n" +"Синтаксис першого коригування, sony_collection_renaming_rules, такий:\n" +"{'назва_фільтра_поля':'назва_категорії', 'назва_фільтра':'назва', ...}\n" +"Друге коригування, sony_collection_name_template, є шаблоном. Для його " +"побудови\n" +"використовується та сама мова шаблонів, що і для засобів обробки і шаблонів\n" +"збереження. Це коригування керує способом, у який значення і категорія\n" +"поєднуються з метою визначення назви збірки.\n" +"Передбачено два поля: {category} і {value}. Поле {value} не повинно бути\n" +"порожнім. Поле {category} може бути порожнім. Типово, значення додається\n" +"на початку, потім додається назва категорії у дужках, якщо вона не порожня:\n" +"'{value} {category:|(|)}'\n" +"Приклади: у перших трьох прикладах значення другого коригування вважається\n" +"типовим.\n" +"1: об’єднати три стовпчики серії у один набір збірок. Назви фільтрів " +"стовпчиків\n" +"такі: «series», «#series_1» і «#series_2». У дужках нічого не повинно бути.\n" +"Тоді значенням коригування має бути:\n" +"sony_collection_renaming_rules={'series':'', '#series_1':'', " +"'#series_2':''}\n" +"2: використати слово «(Series)» для збірок на основі поля «series» і слово\n" +"«(Tag)» для збірок на основі поля «tags». Відповідне коригування:\n" +"sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}\n" +"3: об’єднати збірки «series» і «#myseries» і додати до назви збірки слово\n" +"«(Series)». Правило перейменування:\n" +"sony_collection_renaming_rules={'series':'Series', '#myseries':'Series'}\n" +"4: те саме, що і у прикладі 2, але замість додавання назви категорії у\n" +"дужках до значення, використати її як префікс і відокремити від значення\n" +"двокрапкою, ось так: «Series: Darkover». Слід змінити шаблон форматування\n" +"назви категорії. Остаточні коригування мають бути такими:\n" +"sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}\n" +"sony_collection_name_template='{category:||: }{value}'" #: /home/kovid/work/calibre/resources/default_tweaks.py:316 msgid "Specify how SONY collections are sorted" @@ -21805,6 +21934,26 @@ msgid "" "[ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ]\n" "Default: empty (no rules), so no collection attributes are named." msgstr "" +"Визначає спосіб впорядкування збірок sony. Це коригування використовується,\n" +"лише якщо керування метаданими є автоматичним. Ви можете визначити, які\n" +"метадані буде використано для впорядкування збірок. Формат коригування\n" +"такий: список полів метаданих, на основі яких створюються збірки, потім\n" +"назва поля метаданих, що містить значення, за яким буде виконано\n" +"впорядковування.\n" +"Приклад: вказаний нижче запис означає, що збірки, побудовані на основі\n" +"дати публікації та міток, буде впорядковано за значенням у нетиповому\n" +"стовпчику «#mydate»; збірки, побудовані на основі поля серій, буде\n" +"впорядковано за значенням «series_index», а всі інші збірки буде\n" +"впорядковано за назвою книги. Якщо поле метаданих збірки не має назви,\n" +"у разі збірки, заснованої на серії, її буде впорядковано за порядком\n" +"книг у серії, всі інші збірки буде впорядковано за назвами книг.\n" +"[(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], " +"'title')]\n" +"Зауважте, що квадратні і круглі дужки має бути використано за правилами\n" +"побудови значення параметра. Синтаксична конструкція має бути такою:\n" +"[ ( [список полів], поле впорядкування ) , ( [ список полів ] , поле " +"впорядкування ) ]\n" +"Типове значення: порожнє (без правил), отже без згадування атрибутів збірки." #: /home/kovid/work/calibre/resources/default_tweaks.py:334 msgid "Control how tags are applied when copying books to another library" @@ -21885,6 +22034,15 @@ msgid "" "level sorts, and if you are seeing a slowdown, reduce the value of this " "tweak." msgstr "" +"Встановіть максимальну кількість рівнів впорядковування, використаних " +"calibre\n" +"для перевпорядковування бібліотеки після виконання певних дій, зокрема " +"пошуку\n" +"або з’єднання пристрою. Додаткові рівні впорядковування погіршують " +"швидкодію.\n" +"Якщо база даних є об’ємною (тисячі книг) це погіршення швидкодії може стати\n" +"значним. Якщо багаторівневе впорядковування вам не потрібне або ви помітили\n" +"значне уповільнення роботи програми, зменшіть значення цього коригування." #: /home/kovid/work/calibre/resources/default_tweaks.py:368 msgid "Choose whether dates are sorted using visible fields" @@ -21961,6 +22119,19 @@ msgid "" "Example: locale_for_sorting = 'fr' -- sort using French rules.\n" "Example: locale_for_sorting = 'nb' -- sort using Norwegian rules." msgstr "" +"Встановлення значення цього коригування визначає у примусовому порядку\n" +"впорядковування за правилами певної мови. Може бути корисним, якщо ви\n" +"працюєте з англомовним інтерфейсом calibre, але бажаєте, щоб\n" +"впорядковування виконувалося відповідно до правил вибраної вами мови.\n" +"Встановіть для коригування значення відповідного коду мови за ISO 639-1,\n" +"малими літерами.\n" +"Список підтримуваних локалей розміщено за адресою\n" +"http://publib.boulder.ibm.com/infocenter/iseries/v5r3/topic/nls/rbagsicusorts" +"equencetables.htm\n" +"Типове значення: locale_for_sorting = '' — використовувати мову інтерфейсу " +"calibre\n" +"Приклад: locale_for_sorting = 'fr' — впорядкувати за правилами французької.\n" +"Приклад: locale_for_sorting = 'nb' — впорядкувати за правилами норвезької." #: /home/kovid/work/calibre/resources/default_tweaks.py:405 msgid "Number of columns for custom metadata in the edit metadata dialog" @@ -21995,6 +22166,15 @@ msgid "" "Enter a comma-separated list of custom field lookup names, as in\n" "metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']" msgstr "" +"Керує порядком, у якому буде показано список нетипових стовпчиків у вікні\n" +"окремого і пакетного редагування. Стовпчики зі списку-значення коригування\n" +"буде показано першими у вказаному вами порядку. Всі інші стовпчики буде\n" +"показано останніми і впорядкованими за абеткою. Зауважте, що за допомогою\n" +"цього коригування не можна змінити розмір віджетів редагування.\n" +"Додавання до цього списку віджетів коментарів може призвести до помилкового\n" +"розташовування елементів у режимі показу двома стовпчиками.\n" +"Приклад:\n" +"metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']" #: /home/kovid/work/calibre/resources/default_tweaks.py:422 msgid "The number of seconds to wait before sending emails" From 90be86c7771286a195346c7a9d70567a3f30702f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 11:15:32 +0530 Subject: [PATCH 24/74] Fix unable to load plugins from files on GNOME/XFCE desktops --- src/calibre/gui2/preferences/plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py index 20507b4ce1..2e31b529f6 100644 --- a/src/calibre/gui2/preferences/plugins.py +++ b/src/calibre/gui2/preferences/plugins.py @@ -273,7 +273,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): def add_plugin(self): path = choose_files(self, 'add a plugin dialog', _('Add plugin'), - filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False, + filters=[(_('Plugins [.zip files]'), ['zip'])], all_files=False, select_only_single_file=True) if not path: return From bba6317da2800e9af4162b6c437c0ba7e54ada76 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 11:30:41 +0530 Subject: [PATCH 25/74] ... --- src/calibre/gui2/preferences/plugins.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py index 2e31b529f6..70a8a82311 100644 --- a/src/calibre/gui2/preferences/plugins.py +++ b/src/calibre/gui2/preferences/plugins.py @@ -20,6 +20,7 @@ from calibre.gui2 import (NONE, error_dialog, info_dialog, choose_files, question_dialog, gprefs) from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.icu import lower +from calibre.constants import iswindows class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{ @@ -272,8 +273,9 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.modify_plugin(op='remove') def add_plugin(self): + info = '' if iswindows else ' [.zip %s]'%_('files') path = choose_files(self, 'add a plugin dialog', _('Add plugin'), - filters=[(_('Plugins [.zip files]'), ['zip'])], all_files=False, + filters=[(_('Plugins') + info, ['zip'])], all_files=False, select_only_single_file=True) if not path: return From 44ed7440f4abf977c2c41bfba86ee3d80d5d3fa2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 12:32:48 +0530 Subject: [PATCH 26/74] ... --- setup/installer/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/installer/__init__.py b/setup/installer/__init__.py index 8374f93e38..d0a6cd6fa3 100644 --- a/setup/installer/__init__.py +++ b/setup/installer/__init__.py @@ -48,7 +48,7 @@ class Push(Command): threads = [] for host in ( r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre', - 'kovid@ox:calibre', + 'kovid@leopard_test:calibre', r'kovid@win7:/cygdrive/c/Users/kovid/calibre', ): rcmd = BASE_RSYNC + EXCLUDES + ['.', host] From 78755c8640645b5e9ee538870bfa39017a447d26 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 14:55:30 +0530 Subject: [PATCH 27/74] ... --- recipes/daily_mirror.recipe | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe index f0d28c72e7..800eaf10e9 100644 --- a/recipes/daily_mirror.recipe +++ b/recipes/daily_mirror.recipe @@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): description = 'News as provide by The Daily Mirror -UK' __author__ = 'Dave Asbury' - # last updated 30/10/11 + # last updated 26/12/11 language = 'en_GB' cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg' @@ -13,30 +13,22 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif' - oldest_article = 2 - max_articles_per_feed = 30 + oldest_article = 1 + max_articles_per_feed = 20 remove_empty_feeds = True remove_javascript = True no_stylesheets = True - extra_css = ''' - body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} - ''' - - keep_only_tags = [ - dict(name='div',attrs={'id' : 'body-content'}) - ] - - remove_tags_after = [dict (name='div',attrs={'class' : 'related'})] - + auto_cleanup = True remove_tags = [ - dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}), - dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}), - dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}), - dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'}) + dict(name='title'), + dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}), ] preprocess_regexps = [ - (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match: '')] + (re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')] + + preprocess_regexps = [ + (re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')] feeds = [ @@ -53,5 +45,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml') # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml') - ] + extra_css = ''' + body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} + h1{ font-size:18px;} + img { display:block} + ''' + From 89ce33ebc9e79ff5fa2266d5f0063ece30950360 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 15:00:26 +0530 Subject: [PATCH 28/74] ... --- src/calibre/ebooks/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index a1026e97d6..084b91430f 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -240,7 +240,7 @@ def generate_masthead(title, output_path=None, width=600, height=60): font = ImageFont.truetype(font_path, 48) except: font = ImageFont.truetype(default_font, 48) - text = title.encode('utf-8') + text = title.encode('utf-8') if isinstance(title, unicode) else title width, height = draw.textsize(text, font=font) left = max(int((width - width)/2.), 0) top = max(int((height - height)/2.), 0) From 0e31649305183bc410e7a5fa950dac0ac4654b80 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 27 Dec 2011 18:13:54 +0530 Subject: [PATCH 29/74] NYTimes Global by Krittika Goyal --- recipes/iht.recipe | 79 ++++++--------------- src/calibre/web/feeds/recipes/collection.py | 2 +- 2 files changed, 24 insertions(+), 57 deletions(-) diff --git a/recipes/iht.recipe b/recipes/iht.recipe index 85a87ccba4..bc13a99249 100644 --- a/recipes/iht.recipe +++ b/recipes/iht.recipe @@ -1,63 +1,30 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Derry FitzGerald' -''' -iht.com -''' -import re - from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ptempfile import PersistentTemporaryFile +class NYTimesGlobal(BasicNewsRecipe): + title = u'NY Times Global' + language = 'en' + __author__ = 'Krittika Goyal' + oldest_article = 1 #days + max_articles_per_feed = 25 + use_embedded_content = False -class InternationalHeraldTribune(BasicNewsRecipe): - title = u'The International Herald Tribune' - __author__ = 'Derry FitzGerald' - language = 'en' - - oldest_article = 1 - max_articles_per_feed = 30 no_stylesheets = True + auto_cleanup = True - remove_tags = [dict(name='div', attrs={'class':['footer','header']}), - dict(name=['form'])] - preprocess_regexps = [ - (re.compile(r' - - + + {% endif %} {% block body %} {% endblock %} @@ -58,13 +58,14 @@
    -
    - - - - -
    -
    + +
    + + + + +
    +
    {% endblock %} From e1498bfa3e3e2c28566ec1b42d1a2748eabf4493 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 2 Jan 2012 15:49:03 +0530 Subject: [PATCH 67/74] Fix #910523 (REGEXP "replace with" field drops spaces) --- src/calibre/gui2/convert/__init__.py | 8 ++++++-- src/calibre/gui2/convert/search_and_replace.py | 15 +++------------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index 6cd6f8872d..73b478ac47 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -43,6 +43,9 @@ class Widget(QWidget): ICON = I('config.png') HELP = '' COMMIT_NAME = None + # If True, leading and trailing spaces are removed from line and text edit + # fields + STRIP_TEXT_FIELDS = True changed_signal = pyqtSignal() set_help = pyqtSignal(object) @@ -123,7 +126,6 @@ class Widget(QWidget): if name in getattr(recs, 'disabled_options', []): gui_opt.setDisabled(True) - def get_value(self, g): from calibre.gui2.convert.xpath_wizard import XPathEdit from calibre.gui2.convert.regex_builder import RegexEdit @@ -135,7 +137,9 @@ class Widget(QWidget): return g.value() elif isinstance(g, (QLineEdit, QTextEdit)): func = getattr(g, 'toPlainText', getattr(g, 'text', None))() - ans = unicode(func).strip() + ans = unicode(func) + if self.STRIP_TEXT_FIELDS: + ans = ans.strip() if not ans: ans = None return ans diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index 407e7922e7..b9e2644008 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en' import re -from PyQt4.Qt import QLineEdit, QTextEdit - from calibre.gui2.convert.search_and_replace_ui import Ui_Form from calibre.gui2.convert import Widget from calibre.gui2 import error_dialog @@ -18,6 +16,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form): HELP = _('Modify the document text and structure using user defined patterns.') COMMIT_NAME = 'search_and_replace' ICON = I('search.png') + STRIP_TEXT_FIELDS = False def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, @@ -74,13 +73,5 @@ class SearchAndReplaceWidget(Widget, Ui_Form): _('Invalid regular expression: %s')%err, show=True) return False return True - - def get_vaule(self, g): - if isinstance(g, (QLineEdit, QTextEdit)): - func = getattr(g, 'toPlainText', getattr(g, 'text', None))() - ans = unicode(func) - if not ans: - ans = None - return ans - else: - return Widget.get_value(self, g) + + From cd9412db5189777c4424c2c2eb2947457ea2f231 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 08:04:27 +0530 Subject: [PATCH 68/74] Fix Tagesspiegel --- recipes/tagesspiegel.recipe | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/recipes/tagesspiegel.recipe b/recipes/tagesspiegel.recipe index b71a34aa85..3728f8306c 100644 --- a/recipes/tagesspiegel.recipe +++ b/recipes/tagesspiegel.recipe @@ -33,17 +33,7 @@ class TagesspiegelRSS(BasicNewsRecipe): no_javascript = True remove_empty_feeds = True encoding = 'utf-8' - - keep_only_tags = dict(name='div', attrs={'class':["hcf-article"]}) - remove_tags = [ - dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),dict(name='button'), - dict(name='div', attrs={'class':["hcf-jump-to-comments","hcf-clear","hcf-magnify hcf-media-control", - "hcf-socials-widgets hcf-socials-top","hcf-socials-widgets hcf-socials-bottom"] }), - dict(name='span', attrs={'class':["hcf-mainsearch",] }), - dict(name='ul', attrs={'class':["hcf-tools"]}), - dict(name='ul', attrs={'class': re.compile('hcf-services')}) - ] - + auto_cleanup = True def parse_index(self): soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/') From c58a9c062f88343cd1979804a7cbf9b70eafc678 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 08:11:33 +0530 Subject: [PATCH 69/74] Diario Rio Negro by Darko Miletic. Fixes #911055 (new recipe for diario rio negro) --- recipes/icons/rionegro.png | Bin 0 -> 817 bytes recipes/rionegro.recipe | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 recipes/icons/rionegro.png create mode 100644 recipes/rionegro.recipe diff --git a/recipes/icons/rionegro.png b/recipes/icons/rionegro.png new file mode 100644 index 0000000000000000000000000000000000000000..2ae2f848adb5aa910843bda30cf30bedfaa6612e GIT binary patch literal 817 zcmV-11J3-3P)Mu?h70x{~rs}kj4h=Hg9A_tZD z8)NW48e)hSV*EK6OblQ^4tejw?5t;KCj+^TPQN9@@8r37m!Hg9sCz}<7`2;f^004~% zs34!;Re`cHJzYbauzEJV|Al9AiIkuS5*3dFW{SIY&>j#BM` zYG5+4EJH-mef6ezwuKsV2w0s?ck~cfxDKYLF&v_(^x~yFGRp5PSG_#bG#!Z1qk^?C zEk(gxm##2QC%?anhWpbIm)lw2;J$H-!8kA?cAe(_8*aZFKM=;>x7B7ALm+;<gu>QS-j0rdzd2ls}(RGk1<)vdQ1UyjP?We4snzPXV9zgJjC-KVqvjq+4hLMwY$%mlh@=;lPLHy@S)I zH02g*dmeW=4%USX5W1fz4@o@VCN?$&66tjMfnLR?u{1-N;J+c5^-QT;A*7lMBIyoE zzzR{S5hSs*eWIiNwvPZps*YSyAu{JGD9X;_D=Xmg^nj2dNTpCyvIWG(DgYH)l(!n1 zl2}6wm1}-?g+Hlg>n5~nC5T<5j))bag@>K&k~mhY4U|C${``eDE4}eH4&vjZu3rWT zO9MWHkncaF-nUZEbADeH(CZn^Oivh-iAmSR4l3QgJD?q;r-p2qN{y-q8V#bOhcL*2 vh%VxAD8&XXY_;R@@xT8+IZO~H`2F=0B$kV+#KfB{00000NkvXXu0mjfDEoIS literal 0 HcmV?d00001 diff --git a/recipes/rionegro.recipe b/recipes/rionegro.recipe new file mode 100644 index 0000000000..d224be7152 --- /dev/null +++ b/recipes/rionegro.recipe @@ -0,0 +1,65 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Darko Miletic ' +''' +www.rionegro.com.ar +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class RioNegro(BasicNewsRecipe): + title = 'Diario Rio Negro' + __author__ = 'Darko Miletic' + description = 'Noticias desde la Patagonia Argentina y el resto del mundo' + publisher = 'Editorial Rio Negro SA.' + category = 'news, politics, Argentina' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'es_AR' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.rionegro.com.ar/diario/imagenes/logorn.gif' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + img{display:block} + h1 {font-size: 0.89em; color: red} + h2 {font-family: Georgia,"Times New Roman",Times,serif; font-size: 1.8em} + h3 {font-family: Georgia,"Times New Roman",Times,serif; border-bottom: 2px solid gray} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [ + dict(name=['meta','link','iframe','object','embed']) + ,dict(name='div', attrs={'class':'logo'}) + ] + keep_only_tags=[dict(attrs={'class':'nota'})] + remove_attributes=['lang'] + + + feeds = [ + (u'Argentina' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9532') + ,(u'El Mundo' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9533') + ,(u'Carta de lectores', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9538') + ,(u'Columnistas' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9539') + ,(u'Domingo a Domingo', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9541') + ,(u'Editorial' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9542') + ,(u'Deportes' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9522') + ,(u'Espectaculos' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9523') + ,(u'Sociedad' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9574') + ,(u'Policiales' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9525') + ,(u'Municipales' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9862') + ,(u'Region' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9701') + ] + + def print_version(self, url): + idart_raw = url.rpartition('idart=')[2] + idart = idart_raw.rpartition('&')[0] + return 'http://www.rionegro.com.ar/diario/rn/print.aspx?idArt=' + idart + '&tipo=2' From fe315652835dd6a6d96db74791aef664b10acf39 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 08:37:28 +0530 Subject: [PATCH 70/74] EPUB Input: When converting a file that has entries in the manifest that do no exist, remove them, instead of aborting the conversion. Fixes #910933 (Failing to convert ePub to any format) --- src/calibre/ebooks/oeb/reader.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 0337d47f92..6b2cf798ea 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -175,13 +175,27 @@ class OEBReader(object): manifest = self.oeb.manifest known = set(manifest.hrefs) unchecked = set(manifest.values()) + cdoc = OEB_DOCS|OEB_STYLES + invalid = set() while unchecked: new = set() for item in unchecked: + data = None + if (item.media_type in cdoc or + item.media_type[-4:] in ('/xml', '+xml')): + try: + data = item.data + except: + self.oeb.log.exception(u'Failed to read from manifest ' + u'entry with id: %s, ignoring'%item.id) + invalid.add(item) + continue + if data is None: + continue + if (item.media_type in OEB_DOCS or - item.media_type[-4:] in ('/xml', '+xml')) and \ - item.data is not None: - hrefs = [r[2] for r in iterlinks(item.data)] + item.media_type[-4:] in ('/xml', '+xml')): + hrefs = [r[2] for r in iterlinks(data)] for href in hrefs: href, _ = urldefrag(href) if not href: @@ -197,7 +211,7 @@ class OEBReader(object): new.add(href) elif item.media_type in OEB_STYLES: try: - urls = list(cssutils.getUrls(item.data)) + urls = list(cssutils.getUrls(data)) except: urls = [] for url in urls: @@ -231,6 +245,9 @@ class OEBReader(object): added = manifest.add(id, href, media_type) unchecked.add(added) + for item in invalid: + self.oeb.manifest.remove(item) + def _manifest_from_opf(self, opf): manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): From 1b6e034512d00a256f273a0e20c9bccbab66883f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 15:40:39 +0530 Subject: [PATCH 71/74] ... --- src/calibre/devices/hanvon/driver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/hanvon/driver.py b/src/calibre/devices/hanvon/driver.py index 71d73a4401..c2e9f7249c 100644 --- a/src/calibre/devices/hanvon/driver.py +++ b/src/calibre/devices/hanvon/driver.py @@ -50,7 +50,8 @@ class THEBOOK(N516): BCD = [0x399] MAIN_MEMORY_VOLUME_LABEL = 'The Book Main Memory' EBOOK_DIR_MAIN = 'My books' - WINDOWS_CARD_A_MEM = '_FILE-STOR_GADGE' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['_FILE-STOR_GADGE', + 'FILE-STOR_GADGET'] class LIBREAIR(N516): name = 'Libre Air Driver' From a3f353853a0b243be8ede49de9ef6275c3bed694 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 15:42:57 +0530 Subject: [PATCH 72/74] Change upload installers code to work via a staging server with more upload bandwidth than my current DSL connection --- setup/commands.py | 12 +- setup/hosting.py | 459 ++++++++++++++++++++++++++++++++++++++++++ setup/publish.py | 2 +- setup/upload.py | 493 +++++++++++++--------------------------------- 4 files changed, 602 insertions(+), 364 deletions(-) create mode 100644 setup/hosting.py diff --git a/setup/commands.py b/setup/commands.py index e8ac6d8e42..9fbc048254 100644 --- a/setup/commands.py +++ b/setup/commands.py @@ -16,8 +16,8 @@ __all__ = [ 'sdist', 'manual', 'tag_release', 'pypi_register', 'pypi_upload', 'upload_to_server', - 'upload_user_manual', 'upload_to_mobileread', 'upload_demo', - 'upload_to_sourceforge', 'upload_to_google_code', 'reupload', + 'upload_installers', + 'upload_user_manual', 'upload_demo', 'reupload', 'linux32', 'linux64', 'linux', 'linux_freeze', 'osx32_freeze', 'osx', 'rsync', 'push', 'win32_freeze', 'win32', 'win', @@ -65,14 +65,12 @@ stage4 = Stage4() stage5 = Stage5() publish = Publish() -from setup.upload import UploadUserManual, UploadInstallers, UploadDemo, \ - UploadToServer, UploadToSourceForge, UploadToGoogleCode, ReUpload +from setup.upload import (UploadUserManual, UploadDemo, UploadInstallers, + UploadToServer, ReUpload) upload_user_manual = UploadUserManual() -upload_to_mobileread = UploadInstallers() upload_demo = UploadDemo() upload_to_server = UploadToServer() -upload_to_sourceforge = UploadToSourceForge() -upload_to_google_code = UploadToGoogleCode() +upload_installers = UploadInstallers() reupload = ReUpload() from setup.installer import Rsync, Push diff --git a/setup/hosting.py b/setup/hosting.py new file mode 100644 index 0000000000..d86cef2d31 --- /dev/null +++ b/setup/hosting.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os, time, sys, traceback, subprocess, urllib2, re, base64, httplib +from argparse import ArgumentParser, FileType +from subprocess import check_call +from tempfile import NamedTemporaryFile#, mkdtemp +from collections import OrderedDict + +import mechanize +from lxml import html + +def login_to_google(username, password): + br = mechanize.Browser() + br.addheaders = [('User-agent', + 'Mozilla/5.0 (X11; Linux x86_64; rv:9.0) Gecko/20100101 Firefox/9.0')] + br.set_handle_robots(False) + br.open('https://accounts.google.com/ServiceLogin?service=code') + br.select_form(nr=0) + br.form['Email'] = username + br.form['Passwd'] = password + raw = br.submit().read() + if b'Account overview - Account Settings' not in raw: + raise ValueError(('Failed to login to google with credentials: %s %s' + '\nGoogle sometimes requires verification when logging in from a ' + 'new IP address. Use lynx to login and supply the verification.') + %(username, password)) + return br + +class ReadFileWithProgressReporting(file): # {{{ + + def __init__(self, path, mode='rb'): + file.__init__(self, path, mode) + self.seek(0, os.SEEK_END) + self._total = self.tell() + self.seek(0) + self.start_time = time.time() + + def __len__(self): + return self._total + + def read(self, size): + data = file.read(self, size) + if data: + self.report_progress(len(data)) + return data + + def report_progress(self, size): + sys.stdout.write(b'\x1b[s') + sys.stdout.write(b'\x1b[K') + frac = float(self.tell())/self._total + mb_pos = self.tell()/float(1024**2) + mb_tot = self._total/float(1024**2) + kb_pos = self.tell()/1024.0 + kb_rate = kb_pos/(time.time()-self.start_time) + bit_rate = kb_rate * 1024 + eta = int((self._total - self.tell())/bit_rate) + 1 + eta_m, eta_s = eta / 60, eta % 60 + sys.stdout.write( + ' %.1f%% %.1f/%.1fMB %.1f KB/sec %d minutes, %d seconds left'%( + frac*100, mb_pos, mb_tot, kb_rate, eta_m, eta_s)) + sys.stdout.write(b'\x1b[u') + if self.tell() >= self._total: + sys.stdout.write('\n') + t = int(time.time() - self.start_time) + 1 + print ('Upload took %d minutes and %d seconds at %.1f KB/sec' % ( + t/60, t%60, kb_rate)) + sys.stdout.flush() +# }}} + +class Base(object): # {{{ + + def __init__(self): + self.d = os.path.dirname + self.j = os.path.join + self.a = os.path.abspath + self.b = os.path.basename + self.s = os.path.splitext + self.e = os.path.exists + + def info(self, *args, **kwargs): + print(*args, **kwargs) + sys.stdout.flush() + + def warn(self, *args, **kwargs): + print('\n'+'_'*20, 'WARNING','_'*20) + print(*args, **kwargs) + print('_'*50) + sys.stdout.flush() + +#}}} + +class GoogleCode(Base):# {{{ + + def __init__(self, + # A mapping of filenames to file descriptions. The descriptions are + # used to populate the description field for the upload on google + # code + files, + + # The unix name for the application. + appname, + + # The version being uploaded + version, + + # Google account username + username, + + # Googlecode.com password + password, + + # Google account password + gmail_password, + + # The name of the google code project we are uploading to + gc_project, + + # Server to which to upload the mapping of file names to google + # code URLs. If not None, upload is performed via shelling out to + # ssh, so you must have ssh-agent setup with the authenticated key + # and ssh agent forwarding enabled + gpaths_server=None, + # The path on gpaths_server to which to upload the mapping data + gpaths=None, + + # If True, files are replaced, otherwise existing files are skipped + reupload=False, + + # The pattern to match filenames for the files being uploaded and + # extract version information from them. Must have a named group + # named version + filename_pattern=r'{appname}-(?:portable-)?(?P.+?)(?:-(?:i686|x86_64|32bit|64bit))?\.(?:zip|exe|msi|dmg|tar\.bz2|tar\.xz|txz|tbz2)' + + ): + self.username, self.password, = username, password + self.gmail_password, self.gc_project = gmail_password, gc_project + self.reupload, self.files, self.version = reupload, files, version + self.gpaths, self.gpaths_server = gpaths, gpaths_server + + self.upload_host = '%s.googlecode.com'%gc_project + self.files_list = 'http://code.google.com/p/%s/downloads/list'%gc_project + self.delete_url = 'http://code.google.com/p/%s/downloads/delete?name=%%s'%gc_project + + self.filename_pat = re.compile(filename_pattern.format(appname=appname)) + for x in self.files: + if self.filename_pat.match(os.path.basename(x)) is None: + raise ValueError(('The filename %s does not match the ' + 'filename pattern')%os.path.basename(x)) + + def upload_one(self, fname, retries=2): + self.info('\nUploading', fname) + typ = 'Type-' + ('Source' if fname.endswith('.xz') else 'Archive' if + fname.endswith('.zip') else 'Installer') + ext = os.path.splitext(fname)[1][1:] + op = 'OpSys-'+{'msi':'Windows','zip':'Windows', + 'dmg':'OSX','bz2':'Linux','xz':'All'}[ext] + desc = self.files[fname] + start = time.time() + for i in range(retries): + try: + path = self.upload(os.path.abspath(fname), desc, + labels=[typ, op, 'Featured'], retry=100) + except KeyboardInterrupt: + raise SystemExit(1) + except: + traceback.print_exc() + print ('\nUpload failed, trying again in 30 secs.', + '%d retries left.'%(retries-1)) + time.sleep(30) + else: + break + self.info('Uploaded to:', path, 'in', int(time.time() - start), + 'seconds') + return path + + def re_upload(self): + fnames = {os.path.basename(x):x for x in self.files} + existing = self.old_files.intersection(set(fnames)) + br = self.login_to_google() + for x, src in fnames.iteritems(): + if not os.access(src, os.R_OK): + continue + if x in existing: + self.info('Deleting', x) + br.open(self.delete_url%x) + br.select_form(predicate=lambda y: 'delete.do' in y.action) + br.form.find_control(name='delete') + br.submit(name='delete') + self.upload_one(src) + + def __call__(self): + self.paths = {} + self.old_files = self.get_old_files() + if self.reupload: + return self.re_upload() + + for fname in self.files: + bname = os.path.basename(fname) + if bname in self.old_files: + path = 'http://%s.googlecode.com/files/%s'%(self.gc_project, + bname) + self.info( + '%s already uploaded, skipping. Assuming URL is: %s'%( + bname, path)) + self.old_files.remove(bname) + else: + path = self.upload_one(fname) + self.paths[bname] = path + self.info('Updating path map') + for k, v in self.paths.iteritems(): + self.info('\t%s => %s'%(k, v)) + if self.gpaths and self.gpaths_server: + raw = subprocess.Popen(['ssh', self.gpaths_server, 'cat', self.gpaths], + stdout=subprocess.PIPE).stdout.read() + paths = eval(raw) if raw else {} + paths.update(self.paths) + rem = [x for x in paths if self.version not in x] + for x in rem: paths.pop(x) + raw = ['%r : %r,'%(k, v) for k, v in paths.items()] + raw = '{\n\n%s\n\n}\n'%('\n'.join(raw)) + with NamedTemporaryFile() as t: + t.write(raw) + t.flush() + check_call(['scp', t.name, '%s:%s'%(self.gpaths_server, + self.gpaths)]) + if self.old_files: + self.br = self.login_to_google() + self.delete_old_files() + + def login_to_google(self): + self.info('Logging into Google') + return login_to_google(self.username, self.gmail_password) + + def get_files_hosted_by_google_code(self): + self.info('Getting existing files in google code:', self.gc_project) + raw = urllib2.urlopen(self.files_list).read() + root = html.fromstring(raw) + ans = {} + for a in root.xpath('//td[@class="vt id col_0"]/a[@href]'): + ans[a.text.strip()] = a.get('href') + return ans + + def get_old_files(self): + ans = set() + for fname in self.get_files_hosted_by_google_code(): + m = self.filename_pat.match(fname) + if m is not None: + ans.add(fname) + return ans + + def delete_old_files(self): + if not self.old_files: + return + self.info('Deleting old files from Google Code...') + for fname in self.old_files: + self.info('\tDeleting', fname) + self.br.open(self.delete_url%fname) + self.br.select_form(predicate=lambda x: 'delete.do' in x.action) + self.br.form.find_control(name='delete') + self.br.submit(name='delete') + + def encode_upload_request(self, fields, file_path): + BOUNDARY = '----------Googlecode_boundary_reindeer_flotilla' + + body = [] + + # Add the metadata about the upload first + for key, value in fields: + body.extend( + ['--' + BOUNDARY, + 'Content-Disposition: form-data; name="%s"' % key, + '', + value, + ]) + + # Now add the file itself + file_name = os.path.basename(file_path) + with open(file_path, 'rb') as f: + file_content = f.read() + + body.extend( + ['--' + BOUNDARY, + 'Content-Disposition: form-data; name="filename"; filename="%s"' + % file_name, + # The upload server determines the mime-type, no need to set it. + 'Content-Type: application/octet-stream', + '', + file_content, + ]) + + # Finalize the form body + body.extend(['--' + BOUNDARY + '--', '']) + body = [x.encode('ascii') if isinstance(x, unicode) else x for x in + body] + + return ('multipart/form-data; boundary=%s' % BOUNDARY, + b'\r\n'.join(body)) + + def upload(self, fname, desc, labels=[], retry=0): + form_fields = [('summary', desc)] + form_fields.extend([('label', l.strip()) for l in labels]) + + content_type, body = self.encode_upload_request(form_fields, fname) + upload_uri = '/files' + auth_token = base64.b64encode('%s:%s'% (self.username, self.password)) + headers = { + 'Authorization': 'Basic %s' % auth_token, + 'User-Agent': 'googlecode.com uploader v1', + 'Content-Type': content_type, + } + + with NamedTemporaryFile(delete=False) as f: + f.write(body) + + try: + body = ReadFileWithProgressReporting(f.name) + server = httplib.HTTPSConnection(self.upload_host) + server.request('POST', upload_uri, body, headers) + resp = server.getresponse() + server.close() + finally: + os.remove(f.name) + + if resp.status == 201: + return resp.getheader('Location') + + print ('Failed to upload with code %d and reason: %s'%(resp.status, + resp.reason)) + if retry < 1: + print ('Retrying in 5 seconds....') + time.sleep(5) + return self.upload(fname, desc, labels=labels, retry=retry+1) + raise Exception('Failed to upload '+fname) + + +# }}} + +class SourceForge(Base): # {{{ + + def __init__(self, files, project, version, username, replace=False): + self.username, self.project, self.version = username, project, version + self.base = '/home/frs/project/c/ca/'+project + self.rdir = self.base + '/' + version + self.files = files + + def __call__(self): + for x in self.files: + start = time.time() + self.info('Uploading', x) + for i in range(5): + try: + check_call(['rsync', '-h', '-z', '--progress', '-e', 'ssh -x', x, + '%s,%s@frs.sourceforge.net:%s'%(self.username, self.project, + self.rdir+'/')]) + except KeyboardInterrupt: + raise SystemExit(1) + except: + print ('\nUpload failed, trying again in 30 seconds') + time.sleep(30) + else: + break + print ('Uploaded in', int(time.time() - start), 'seconds\n\n') + +# }}} + +# CLI {{{ +def cli_parser(): + epilog='Copyright Kovid Goyal 2012' + + p = ArgumentParser( + description='Upload project files to a hosting service automatically', + epilog=epilog + ) + a = p.add_argument + a('appname', help='The name of the application, all files to' + ' upload should begin with this name') + a('version', help='The version of the application, all files to' + ' upload should contain this version') + a('file_map', type=FileType('rb'), + help='A file containing a mapping of files to be uploaded to ' + 'descriptions of the files. The descriptions will be visible ' + 'to users trying to get the file from the hosting service. ' + 'The format of the file is filename: description, with one per ' + 'line. filename can be a path to the file relative to the current ' + 'directory.') + a('--replace', action='store_true', default=False, + help='If specified, existing files are replaced, otherwise ' + 'they are skipped.') + + subparsers = p.add_subparsers(help='Where to upload to', dest='service', + title='Service', description='Hosting service to upload to') + gc = subparsers.add_parser('googlecode', help='Upload to googlecode', + epilog=epilog) + sf = subparsers.add_parser('sourceforge', help='Upload to sourceforge', + epilog=epilog) + a = gc.add_argument + + a('project', + help='The name of the project on google code we are uploading to') + a('username', + help='Username to log into your google account') + a('password', + help='Password to log into your google account') + a('gc_password', + help='Password for google code hosting.' + ' Get it from http://code.google.com/hosting/settings') + + a('--path-map-server', + help='A server to which the mapping of filenames to googlecode ' + 'URLs will be uploaded. The upload happens via ssh, so you must ' + 'have a working ssh agent') + a('--path-map-location', + help='Path on the server where the path map is placed.') + + a = sf.add_argument + a('project', + help='The name of the project on sourceforge we are uploading to') + a('username', + help='Sourceforge username') + + return p + +def main(args=None): + cli = cli_parser() + args = cli.parse_args(args) + files = {} + with args.file_map as f: + for line in f: + fname, _, desc = line.partition(':') + fname, desc = fname.strip(), desc.strip() + if fname and desc: + files[fname] = desc + + ofiles = OrderedDict() + for x in sorted(files, key=lambda x:os.stat(x).st_size, reverse=True): + ofiles[x] = files[x] + + if args.service == 'googlecode': + gc = GoogleCode(ofiles, args.appname, args.version, args.username, + args.gc_password, args.password, args.project, + gpaths_server=args.path_map_server, + gpaths=args.path_map_location, reupload=args.replace) + gc() + elif args.service == 'sourceforge': + sf = SourceForge(ofiles, args.project, args.version, args.username, + replace=args.replace) + sf() + +if __name__ == '__main__': + main() +# }}} + diff --git a/setup/publish.py b/setup/publish.py index 26769f271e..72bf6c9cbb 100644 --- a/setup/publish.py +++ b/setup/publish.py @@ -45,7 +45,7 @@ class Stage3(Command): class Stage4(Command): description = 'Stage 4 of the publish process' - sub_commands = ['upload_to_sourceforge', 'upload_to_google_code'] + sub_commands = ['upload_installers'] class Stage5(Command): diff --git a/setup/upload.py b/setup/upload.py index fae0f1a3cc..d1ab1f3c85 100644 --- a/setup/upload.py +++ b/setup/upload.py @@ -5,12 +5,15 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, \ - glob, stat, sys +import os, re, subprocess, hashlib, shutil, glob, stat, sys from subprocess import check_call from tempfile import NamedTemporaryFile, mkdtemp from zipfile import ZipFile +if __name__ == '__main__': + d = os.path.dirname + sys.path.insert(0, d(d(os.path.abspath(__file__)))) + from setup import Command, __version__, installer_name, __appname__ PREFIX = "/var/www/calibre-ebook.com" @@ -19,8 +22,9 @@ BETAS = DOWNLOADS +'/betas' USER_MANUAL = '/var/www/localhost/htdocs/' HTML2LRF = "calibre/ebooks/lrf/html/demo" TXT2LRF = "src/calibre/ebooks/lrf/txt/demo" -MOBILEREAD = 'ftp://dev.mobileread.com/calibre/' - +STAGING_HOST = '67.207.135.179' +STAGING_USER = 'root' +STAGING_DIR = '/root/staging' def installers(): installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2'))) @@ -47,10 +51,10 @@ class ReUpload(Command): # {{{ description = 'Re-uplaod any installers present in dist/' - sub_commands = ['upload_to_google_code', 'upload_to_sourceforge'] + sub_commands = ['upload_installers'] def pre_sub_commands(self, opts): - opts.re_upload = True + opts.replace = True def run(self, opts): for x in installers(): @@ -58,371 +62,91 @@ class ReUpload(Command): # {{{ os.remove(x) # }}} -class ReadFileWithProgressReporting(file): # {{{ - - def __init__(self, path, mode='rb'): - file.__init__(self, path, mode) - self.seek(0, os.SEEK_END) - self._total = self.tell() - self.seek(0) - self.start_time = time.time() - - def __len__(self): - return self._total - - def read(self, size): - data = file.read(self, size) - if data: - self.report_progress(len(data)) - return data - - def report_progress(self, size): - sys.stdout.write(b'\x1b[s') - sys.stdout.write(b'\x1b[K') - frac = float(self.tell())/self._total - mb_pos = self.tell()/float(1024**2) - mb_tot = self._total/float(1024**2) - kb_pos = self.tell()/1024.0 - kb_rate = kb_pos/(time.time()-self.start_time) - bit_rate = kb_rate * 1024 - eta = int((self._total - self.tell())/bit_rate) + 1 - eta_m, eta_s = eta / 60, eta % 60 - sys.stdout.write( - ' %.1f%% %.1f/%.1fMB %.1f KB/sec %d minutes, %d seconds left'%( - frac*100, mb_pos, mb_tot, kb_rate, eta_m, eta_s)) - sys.stdout.write(b'\x1b[u') - if self.tell() >= self._total: - sys.stdout.write('\n') - t = int(time.time() - self.start_time) + 1 - print ('Upload took %d minutes and %d seconds at %.1f KB/sec' % ( - t/60, t%60, kb_rate)) - sys.stdout.flush() -# }}} - -class UploadToGoogleCode(Command): # {{{ - - USERNAME = 'kovidgoyal' - # Password can be gotten by going to - # http://code.google.com/hosting/settings - # while logged into gmail +# Data {{{ +def get_google_data(): PASSWORD_FILE = os.path.expanduser('~/.googlecodecalibre') OFFLINEIMAP = os.path.expanduser('~/work/kde/conf/offlineimap/rc') - GPATHS = '/var/www/status.calibre-ebook.com/googlepaths' - # If you change this, remember to change the default URL used by - # http://calibre-ebook.com as well - GC_PROJECT = 'calibre-ebook-ii' - UPLOAD_HOST = '%s.googlecode.com'%GC_PROJECT - FILES_LIST = 'http://code.google.com/p/%s/downloads/list'%GC_PROJECT - DELETE_URL = 'http://code.google.com/p/%s/downloads/delete?name=%%s'%GC_PROJECT + gc_password = open(PASSWORD_FILE).read().strip() + raw = open(OFFLINEIMAP).read() + pw = re.search(r'(?s)remoteuser = .*@gmail.com.*?remotepass = (\S+)', + raw).group(1).strip() + return { + 'username':'kovidgoyal@gmail.com', 'password':pw, 'gc_password':gc_password, + 'path_map_server':'root@kovidgoyal.net', + 'path_map_location':'/var/www/status.calibre-ebook.com/googlepaths', + 'project':'calibre-ebook-ii' + } - def add_options(self, parser): - parser.add_option('--re-upload', default=False, action='store_true', - help='Re-upload all installers currently in dist/') +def get_sourceforge_data(): + return {'username':'kovidgoyal', 'project':'calibre'} - def re_upload(self): - fnames = set([os.path.basename(x) for x in installers() if not - x.endswith('.tar.xz') and os.path.exists(x)]) - existing = set(self.old_files.keys()).intersection(fnames) - br = self.login_to_gmail() - for x in fnames: - src = os.path.join('dist', x) - if not os.access(src, os.R_OK): - continue - if x in existing: - self.info('Deleting', x) - br.open(self.DELETE_URL%x) - br.select_form(predicate=lambda y: 'delete.do' in y.action) - br.form.find_control(name='delete') - br.submit(name='delete') - self.upload_one(src) +def send_data(loc): + subprocess.check_call(['rsync', '-r', '-z', '-h', '--progress', '-e', 'ssh -x', + loc+'/', '%s@%s:%s'%(STAGING_USER, STAGING_HOST, STAGING_DIR)]) - def upload_one(self, fname): - self.info('\nUploading', fname) - typ = 'Type-' + ('Source' if fname.endswith('.xz') else 'Archive' if - fname.endswith('.zip') else 'Installer') - ext = os.path.splitext(fname)[1][1:] - op = 'OpSys-'+{'msi':'Windows','zip':'Windows', - 'dmg':'OSX','bz2':'Linux','xz':'All'}[ext] - desc = installer_description(fname) - start = time.time() - for i in range(5): - try: - path = self.upload(os.path.abspath(fname), desc, - labels=[typ, op, 'Featured']) - except KeyboardInterrupt: - raise SystemExit(1) - except: - import traceback - traceback.print_exc() - print ('\nUpload failed, trying again in 30 secs') - time.sleep(30) - else: - break - self.info('Uploaded to:', path, 'in', int(time.time() - start), - 'seconds') - return path +def gc_cmdline(ver, gdata): + return [__appname__, ver, 'fmap', 'googlecode', + gdata['project'], gdata['username'], gdata['password'], + gdata['gc_password'], '--path-map-server', + gdata['path_map_server'], '--path-map-location', + gdata['path_map_location']] - def run(self, opts): - self.opts = opts - self.password = open(self.PASSWORD_FILE).read().strip() - self.paths = {} - self.old_files = self.get_files_hosted_by_google_code() +def sf_cmdline(ver, sdata): + return [__appname__, ver, 'fmap', 'sourceforge', sdata['project'], + sdata['username']] - if opts.re_upload: - return self.re_upload() - - for fname in installers(): - bname = os.path.basename(fname) - if bname in self.old_files: - path = 'http://%s.googlecode.com/files/%s'%(self.GC_PROJECT, - bname) - self.info( - '%s already uploaded, skipping. Assuming URL is: %s'%( - bname, path)) - self.old_files.pop(bname) - else: - path = self.upload_one(fname) - self.paths[bname] = path - self.info('Updating path map') - self.info(repr(self.paths)) - raw = subprocess.Popen(['ssh', 'divok', 'cat', self.GPATHS], - stdout=subprocess.PIPE).stdout.read() - paths = eval(raw) - paths.update(self.paths) - rem = [x for x in paths if __version__ not in x] - for x in rem: paths.pop(x) - raw = ['%r : %r,'%(k, v) for k, v in paths.items()] - raw = '{\n\n%s\n\n}\n'%('\n'.join(raw)) - t = NamedTemporaryFile() - t.write(raw) - t.flush() - check_call(['scp', t.name, 'divok:'+self.GPATHS]) - self.br = self.login_to_gmail() - self.delete_old_files() - #if len(self.get_files_hosted_by_google_code()) > len(installers()): - # self.warn('Some old files were not deleted from Google Code') - - def login_to_gmail(self): - import mechanize - self.info('Logging into Gmail') - raw = open(self.OFFLINEIMAP).read() - pw = re.search(r'(?s)remoteuser = .*@gmail.com.*?remotepass = (\S+)', - raw).group(1).strip() - br = mechanize.Browser() - br.set_handle_robots(False) - br.open('http://gmail.com') - br.select_form(nr=0) - br.form['Email'] = self.USERNAME - br.form['Passwd'] = pw - br.submit() - return br - - def get_files_hosted_by_google_code(self): - import urllib2 - from lxml import html - self.info('Getting existing files in google code') - raw = urllib2.urlopen(self.FILES_LIST).read() - root = html.fromstring(raw) - ans = {} - for a in root.xpath('//td[@class="vt id col_0"]/a[@href]'): - ans[a.text.strip()] = a.get('href') - return ans - - def delete_old_files(self): - self.info('Deleting old files from Google Code...') - for fname in self.old_files: - ext = fname.rpartition('.')[-1] - if ext in ('flv', 'mp4', 'ogg', 'avi'): - continue - self.info('\tDeleting', fname) - self.br.open(self.DELETE_URL%fname) - self.br.select_form(predicate=lambda x: 'delete.do' in x.action) - self.br.form.find_control(name='delete') - self.br.submit(name='delete') - - def encode_upload_request(self, fields, file_path): - BOUNDARY = '----------Googlecode_boundary_reindeer_flotilla' - CRLF = '\r\n' - - body = [] - - # Add the metadata about the upload first - for key, value in fields: - body.extend( - ['--' + BOUNDARY, - 'Content-Disposition: form-data; name="%s"' % key, - '', - value, - ]) - - # Now add the file itself - file_name = os.path.basename(file_path) - with open(file_path, 'rb') as f: - file_content = f.read() - - body.extend( - ['--' + BOUNDARY, - 'Content-Disposition: form-data; name="filename"; filename="%s"' - % file_name, - # The upload server determines the mime-type, no need to set it. - 'Content-Type: application/octet-stream', - '', - file_content, - ]) - - # Finalize the form body - body.extend(['--' + BOUNDARY + '--', '']) - - return 'multipart/form-data; boundary=%s' % BOUNDARY, CRLF.join(body) - - def upload(self, fname, desc, labels=[], retry=0): - form_fields = [('summary', desc)] - form_fields.extend([('label', l.strip()) for l in labels]) - - content_type, body = self.encode_upload_request(form_fields, fname) - upload_uri = '/files' - auth_token = base64.b64encode('%s:%s'% (self.USERNAME, self.password)) - headers = { - 'Authorization': 'Basic %s' % auth_token, - 'User-Agent': 'Calibre googlecode.com uploader v0.1.0', - 'Content-Type': content_type, - } - - with NamedTemporaryFile(delete=False) as f: - f.write(body) - - try: - body = ReadFileWithProgressReporting(f.name) - server = httplib.HTTPSConnection(self.UPLOAD_HOST) - server.request('POST', upload_uri, body, headers) - resp = server.getresponse() - server.close() - finally: - os.remove(f.name) - - if resp.status == 201: - return resp.getheader('Location') - - print 'Failed to upload with code %d and reason: %s'%(resp.status, - resp.reason) - if retry < 1: - print 'Retrying in 5 seconds....' - time.sleep(5) - return self.upload(fname, desc, labels=labels, retry=retry+1) - raise Exception('Failed to upload '+fname) - -# }}} - -class UploadToSourceForge(Command): # {{{ - - description = 'Upload release files to sourceforge' - - USERNAME = 'kovidgoyal' - PROJECT = 'calibre' - BASE = '/home/frs/project/c/ca/'+PROJECT - - @property - def rdir(self): - return self.BASE+'/'+__version__ - - def upload_installers(self): - for x in installers(): - if not os.path.exists(x): continue - start = time.time() - self.info('Uploading', x) - for i in range(5): - try: - check_call(['rsync', '-z', '--progress', '-e', 'ssh -x', x, - '%s,%s@frs.sourceforge.net:%s'%(self.USERNAME, self.PROJECT, - self.rdir+'/')]) - except KeyboardInterrupt: - raise SystemExit(1) - except: - print ('\nUpload failed, trying again in 30 seconds') - time.sleep(30) - else: - break - print 'Uploaded in', int(time.time() - start), 'seconds' - print ('\n') - - def run(self, opts): - self.opts = opts - self.upload_installers() +def run_remote_upload(args): + print 'Running remotely:', ' '.join(args) + subprocess.check_call(['ssh', '-x', '%s@%s'%(STAGING_USER, STAGING_HOST), + 'cd', STAGING_DIR, '&&', 'python', 'hosting.py']+args) # }}} class UploadInstallers(Command): # {{{ - description = 'Upload any installers present in dist/ to mobileread' - def curl_list_dir(self, url=MOBILEREAD, listonly=1): - import pycurl - c = pycurl.Curl() - c.setopt(pycurl.URL, url) - c.setopt(c.FTP_USE_EPSV, 1) - c.setopt(c.NETRC, c.NETRC_REQUIRED) - c.setopt(c.FTPLISTONLY, listonly) - c.setopt(c.FTP_CREATE_MISSING_DIRS, 1) - b = cStringIO.StringIO() - c.setopt(c.WRITEFUNCTION, b.write) - c.perform() - c.close() - return b.getvalue().split() if listonly else b.getvalue().splitlines() - def curl_delete_file(self, path, url=MOBILEREAD): - import pycurl - c = pycurl.Curl() - c.setopt(pycurl.URL, url) - c.setopt(c.FTP_USE_EPSV, 1) - c.setopt(c.NETRC, c.NETRC_REQUIRED) - self.info('Deleting file %s on %s'%(path, url)) - c.setopt(c.QUOTE, ['dele '+ path]) - c.perform() - c.close() - - - def curl_upload_file(self, stream, url): - import pycurl - c = pycurl.Curl() - c.setopt(pycurl.URL, url) - c.setopt(pycurl.UPLOAD, 1) - c.setopt(c.NETRC, c.NETRC_REQUIRED) - c.setopt(pycurl.READFUNCTION, stream.read) - stream.seek(0, 2) - c.setopt(pycurl.INFILESIZE_LARGE, stream.tell()) - stream.seek(0) - c.setopt(c.NOPROGRESS, 0) - c.setopt(c.FTP_CREATE_MISSING_DIRS, 1) - self.info('Uploading file %s to url %s' % (getattr(stream, 'name', ''), - url)) - try: - c.perform() - c.close() - except: - pass - files = self.curl_list_dir(listonly=0) - for line in files: - line = line.split() - if url.endswith(line[-1]): - size = long(line[4]) - stream.seek(0,2) - if size != stream.tell(): - raise RuntimeError('curl failed to upload %s correctly'%getattr(stream, 'name', '')) - - def upload_installer(self, name): - if not os.path.exists(name): - return - bname = os.path.basename(name) - pat = re.compile(bname.replace(__version__, r'\d+\.\d+\.\d+')) - for f in self.curl_list_dir(): - if pat.search(f): - self.curl_delete_file('/calibre/'+f) - self.curl_upload_file(open(name, 'rb'), MOBILEREAD+os.path.basename(name)) + def add_option(self, parser): + parser.add_option('--replace', help= + 'Replace existing installers, when uploading to google') def run(self, opts): - self.info('Uploading installers...') - installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2'))) - installers.append(installer_name('tar.bz2', is64bit=True)) - map(self.upload_installer, installers) + all_possible = set(installers()) + available = set(glob.glob('dist/*')) + files = {x:installer_description(x) for x in + all_possible.intersection(available)} + tdir = mkdtemp() + try: + self.upload_to_staging(tdir, files) + self.upload_to_sourceforge() + self.upload_to_google(opts.replace) + finally: + shutil.rmtree(tdir, ignore_errors=True) + + def upload_to_staging(self, tdir, files): + os.mkdir(tdir+'/dist') + hosting = os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'hosting.py') + shutil.copyfile(hosting, os.path.join(tdir, 'hosting.py')) + + for f in files: + shutil.copyfile(f, os.path.join(tdir, f)) + + with open(os.path.join(tdir, 'fmap'), 'wb') as fo: + for f, desc in files.iteritems(): + fo.write('%s: %s\n'%(f, desc)) + send_data(tdir) + + def upload_to_google(self, replace): + gdata = get_google_data() + args = gc_cmdline(__version__, gdata) + if replace: + args = ['--replace'] + args + run_remote_upload(args) + + def upload_to_sourceforge(self): + sdata = get_sourceforge_data() + args = sf_cmdline(__version__, sdata) + run_remote_upload(args) # }}} class UploadUserManual(Command): # {{{ @@ -508,4 +232,61 @@ class UploadToServer(Command): # {{{ shutil.rmtree(tdir) # }}} +# Testing {{{ + +def write_files(fmap): + for f in fmap: + with open(f, 'wb') as f: + f.write(os.urandom(100)) + f.write(b'a'*1000000) + with open('fmap', 'wb') as fo: + for f, desc in fmap.iteritems(): + fo.write('%s: %s\n'%(f, desc)) + +def setup_installers(): + ver = '0.0.1' + files = {x.replace(__version__, ver):installer_description(x) for x in installers()} + tdir = mkdtemp() + os.chdir(tdir) + return tdir, files, ver + +def test_google_uploader(): + gdata = get_google_data() + gdata['project'] = 'calibre-hosting-uploader' + gdata['path_map_location'] += '-test' + hosting = os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'hosting.py') + + tdir, files, ver = setup_installers() + try: + os.mkdir('dist') + write_files(files) + shutil.copyfile(hosting, 'hosting.py') + send_data(tdir) + args = gc_cmdline(ver, gdata) + + print ('Doing initial upload') + run_remote_upload(args) + raw_input('Press Enter to proceed:') + + print ('\nDoing re-upload') + run_remote_upload(['--replace']+args) + raw_input('Press Enter to proceed:') + + nv = ver + '.1' + files = {x.replace(__version__, nv):installer_description(x) for x in installers()} + write_files(files) + send_data(tdir) + args[1] = nv + print ('\nDoing update upload') + run_remote_upload(args) + print ('\nDont forget to delete any remaining files in the %s project'% + gdata['project']) + + finally: + shutil.rmtree(tdir) +# }}} + +if __name__ == '__main__': + test_google_uploader() From 972b551ae1eee2c66f7f649456427d3b25d47c91 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 15:51:18 +0530 Subject: [PATCH 73/74] ... --- recipes/tagesspiegel.recipe | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/recipes/tagesspiegel.recipe b/recipes/tagesspiegel.recipe index 3728f8306c..5af065ce53 100644 --- a/recipes/tagesspiegel.recipe +++ b/recipes/tagesspiegel.recipe @@ -33,7 +33,12 @@ class TagesspiegelRSS(BasicNewsRecipe): no_javascript = True remove_empty_feeds = True encoding = 'utf-8' - auto_cleanup = True + remove_tags = [{'class':'hcf-header'}] + + def print_version(self, url): + url = url.split('/') + url[-1] = 'v_print,%s?p='%url[-1] + return '/'.join(url) def parse_index(self): soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/') From 8cda640dd1df5086760632299e5a4fa647f4465c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Jan 2012 16:11:57 +0530 Subject: [PATCH 74/74] Switch User Manual search engine to new adsense account --- src/calibre/manual/templates/search.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/manual/templates/search.html b/src/calibre/manual/templates/search.html index 58901c8fb4..014ee11a65 100644 --- a/src/calibre/manual/templates/search.html +++ b/src/calibre/manual/templates/search.html @@ -33,7 +33,7 @@