Sync to trunk.

2025-08-30 23:00:21 -04:00 · 2010-02-20 17:15:35 -05:00 · 2010-02-20 17:15:35 -05:00 · 07dab2d5ae
commit 07dab2d5ae
parent c69697f143 b3011cad59
198 changed files with 40427 additions and 30585 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,223 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.6.42
+  date: 2010-02-20
+
+  bug fixes:
+    - title: "Fix regression that broke catalog generation from the Graphical User Interface in 0.6.41"
+    
+    - title: "Fix right edge of comics like Dilbert and xkcd getting cut off on the SONY reader. More generally, take page margins into account when rescaling images to fit in the selected output profile."
+
+
+- version: 0.6.41
+  date: 2010-02-19
+
+  new features:
+    - title: "Make calibre timezone aware. This required lots of internal changes, so I may have broken something"
+      type: major
+
+    - title: "Allow editing of metadata in DRMed MOBI files"
+      type: major
+
+    - title: "ebook-convert: Allow passing URLs as argument to --cover"
+      tickets: [4909]
+
+    - title: "OS X/linux driver for EB511"
+
+    - title: "ebook-meta: Allow changing of published date"
+
+    - title: "Make replacing of files in ZIP archives faster and (hopefully) more robust"
+
+    - title: "Speed optimization for viewing large EPUB files"
+
+    - title: "Speed up parsing of OPF files"
+      tickets: [4908]
+
+  bug fixes:
+    - title: "Fix drag and drop of multiple books to OS X dock icon"
+      tickets: [4849]
+
+    - title: "MOBI Output: Encode titles as UTF-8 in the PalmDoc header as well as the EXTH header, since there are apparently MOBI readers that use the title from the PalmDoc header in preference to the title from the EXTH header."
+
+    - title: "MOBI Output: Remove soft hyphens as the Kindle doesn't support them."
+      tickets: [4887]
+
+    - title: "Fix Boox main mem and SD card swapped on windows"
+
+    - title: "Fix sending large ebook fiels to devices"
+      tickets: [4896]
+
+    - title: "EPUB Output: Strip invalid anchors from NCX TOC as Adobe Digital Editions cries when it sees one"
+      tickets: [4907]
+
+    - title: "EPUB metadata: Don't set title_sort as a file_as attribute, as the brain-dead OPF spec doesn't allow this"
+
+    - title: "Make publishing the content server via mDNS a little more robust"
+
+    - title: "Content server: Use new exact matching for greater precision when generating OPDS catalogs. Also fix regression that broke rowsing by Tags on Stanza."
+
+    - title: "Proper fix for breakage in LRF viewer caused by API change in QGraphicsItem in Qt 4.6"
+
+  new recipes:
+    - title: Various Polish news sources
+      author: Tomaz Dlugosz
+
+    - title: Que Leer, Wired UK
+      author: Darko Miletic
+
+    - title: Kathermini and Ta Nea
+      author: Pan
+
+    - title: Winter Olympics
+      author: Starson17
+
+  improved recipes:
+    - Wired Magazine
+
+- version: 0.6.40
+  date: 2010-02-12
+
+  new features:
+    - title: "Ability to perform exact match and regular expression based searches."
+      type: major
+      tickets: [4830]
+      description: >
+        "You can now perform exact match searches by prefixing your search term with an =.
+        So for example, tag:=fiction will match all tags named fiction, but not tags named
+        non-fiction. Similarly, you can use regular expression based searches by prefixing
+        the search term by ~."
+
+    - title: "Autodetect if a zip/rar file is actually a comic and if so, import it as CBZ/CBR"
+      tickets: [4753]
+
+    - title: "Add plugin to automatically extract an ebook during import if it is in a zip/rar archive"
+
+    - title: "Linux source install: Install a calibre environment module to ease the integration of calibre into other python projects"
+
+  bug fixes:
+    - title: "Fix regression in 0.6.39 that broke the LRF viewer"
+
+    - title: "ZIP/EPUB files: Try to detect file name encoding instead of assuming the name is encoded in UTF-8. Also correctly
+              encode the extracted file name in the local filesystem encoding."
+
+    - title: "HTML Input: Handle HTML fragments more gracefully"
+      tickets: [4854]
+
+    - title: "Zip files: Workaround invalid zip files that contain end-of-file comments but set comment size to zero"
+
+    - title: "Restore the recipe for the Wired daily feed."
+      tickets: [4871]
+
+    - title: "MOBI metadata: Preserve original EXTH records when not overwrriten by calibre metadata."
+
+    - title: "Catalog generation: Improved series sorting. All books not in a series are now grouped together"
+
+    - title: "Fix occassional threading related crash when using the ChooseFormatDialog"
+
+    - title: "Catalog generation: Various fixes for handling invalid data"
+
+  new recipes:
+    - title: Sueddeustche Zeitung 
+      author: Darko Miletic
+
+  improved recipes:
+    - Pagina 12
+    - Variety
+    - Toronto Sun
+    - Telegraph UK
+    - Danas
+    - Dilbert
+
+- version: 0.6.39
+  date: 2010-02-09
+
+  new features:
+    - title: "Add ability to control how author sort strings are automatically generated from author strings, via the config file tweaks.py"
+
+    - title: "Handle broken EPUB files from Project Gutenberg that have invalid OCF containers"
+      tickets: [4832]
+
+  bug fixes:
+    - title: "Fix regression in 0.6.38 that broke setting bookmarks in the viewer"
+
+    - title: "HTML Input: Ignore filenames that are encoded incorerctly."
+
+  new recipes:
+
+    - title: Radikal
+      author: Darko Miletic
+
+
+- version: 0.6.38
+  date: 2010-02-09
+
+  new features:
+    - title: "Driver for the Irex DR 800"
+
+    - title: "Driver for the Booq e-book reader"
+    
+    - title: "Allow automatic series increment algorithm to be tweaked by editing the config file tweaks.py"
+
+    - title: "Various improvements to the catlog generation. Larger thumbnails in EPUB output and better series sorting. Better handling of html markup in the comments."
+
+    - title: "MOBI Output: Make font used for generated masthead images user customizable."
+
+  bug fixes:
+    - title: "E-book viewer: Make bookmarking (and remebering last open position more robust). For linuxsource installs, you must have Qt 4.6"
+      tickets: [4812]
+
+    - title: "Fix conversion/import of HTML files with very long href links on windows"
+      tickets: [4783]
+
+    - title: "Don't read metadata from filenames for download news, even if the user has the read metadata from filename option set"
+      tickets: [4758]
+
+    - title: "Don't allow leading or trailing space in tags and series. Also normalize all internal spaces to a single space"
+      tickets: [4809]
+
+    - title: "E-book viewer: Toolbars remember their position"
+      tickets: [4811]
+
+    - title: "Fix year being repeated when editing date in main library screen on windows"
+      tickets: [4829]
+
+    - title: "New download: Fix downloading of images from URLs with an ampersand in them"
+
+    - title: "Linux source install: unbundle cssutils, it is now an external dependancy"
+
+    - title: "MOBI metadata: Fix regression that broke setting of titles in some MOBI files"
+
+    - title: "EPUB metadata: Extract the cover image from the html it is embededd in if possible, instead of rendering the html. Removes the white margins on covers and speeds up cover extraction"
+
+    - title: "Fix regression in PDB output"
+
+    - title: "News download: Remove <base> tags automatically"
+
+    - title: "Searching on device: Ignore unicode errors"
+
+
+  new recipes:
+    - title: Courier Press
+      author: Krittika Goyal
+
+    - title: zive.sk and iliterature.cz
+      author: Abelturd
+
+    - title: El Comerico, Digital Spy UK, Gizmodo, News Straits Times, Read It Later, TidBits
+      author: Darko Miletic
+
+  improved recipes:
+    - Jerusalem Post
+    - Clarin
+    - La Nacion
+    - Harvard Business Review
+    - People US Mashup
+    - The New Republic
+    - "Pagina 12"
+    - Discover Magazine
+    - Metro Montreal
+
 - version: 0.6.37
  date: 2010-02-01

--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -79,3 +79,9 @@ p.unread_book {
 	text-indent:-2em;
 	}

+hr.series_divider {
+	width:50%;
+	margin-left:1em;
+	margin-top:0em;
+	margin-bottom:0em;
+	}
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Contains various tweaks that affect calibre behavior. Only edit this file if
+you know what you are dong. If you delete this file, it will be recreated from
+defaults.
+'''
+
+
+# The algorithm used to assign a new book in an existing series a series number.
+# Possible values are:
+# next - Next available number
+# const - Assign the number 1 always
+series_index_auto_increment = 'next'
+
+
+
+# The algorithm used to copy author to author_sort
+# Possible values are:
+#  invert: use "fn ln" -> "ln, fn" (the original algorithm)
+#  copy  : copy author to author_sort without modification
+#  comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
+author_sort_copy_method = 'invert'
--- a/resources/images/catalog.svg
+++ b/resources/images/catalog.svg
@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363)  -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+	 xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
+	 viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
+<filter  id="filter5365">
+	<feGaussianBlur  stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
+</filter>
+<g id="layer1">
+</g>
+<g id="layer2">
+	<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new    " points="167.5,297.005 171.429,297.005 
+		171.429,297.005 	"/>
+	<g id="path5265" filter="url(#filter5365)">
+		<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 		"/>
+		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09 
+			119.953,80.636 70.397,97.084 		"/>
+	</g>
+	<g id="path5267" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
+			l2.322,16.553L118.639,100.902z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
+			c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
+	</g>
+	<g id="path5269" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
+			C68.936,101.726,70.711,98.81,70.711,98.81z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
+			c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
+	</g>
+	<g id="path5271" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
+			C17.974,94.288,17.113,87.874,21.479,79.607z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
+			l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
+	</g>
+	<g id="path5273" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
+			l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
+			l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
+			L120.871,99.092z"/>
+	</g>
+	<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
+	<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
+		c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
+	
+		<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
+		<stop  offset="0" style="stop-color:#B5FFA6"/>
+		<stop  offset="1" style="stop-color:#76E976"/>
+	</radialGradient>
+	<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
+		l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
+	<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
+		M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
+		l0.356,14.644L118.22,97.921z"/>
+	<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
+		C66.471,100.649,68.068,97.629,68.068,97.629z"/>
+	<g id="path5419" filter="url(#filter5365)">
+		<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 		"/>
+		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233 
+			106.738,52.778 57.183,69.227 		"/>
+	</g>
+	<g id="path5421" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
+			l2.322,16.552L105.424,73.045z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
+			c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
+	</g>
+	<g id="path5423" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
+			C55.721,73.869,57.497,70.953,57.497,70.953z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
+			c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
+	</g>
+	<g id="path5425" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
+			C4.759,66.431,3.899,60.017,8.265,51.751z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
+			L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
+	</g>
+	<g id="path5427" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
+			l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
+			L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
+			L107.656,71.234z"/>
+	</g>
+	<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
+		l49.548,18.171L54.54,67.985L6.102,50.193z"/>
+	<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
+		c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
+	
+		<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
+		<stop  offset="0" style="stop-color:#789DED"/>
+		<stop  offset="1" style="stop-color:#2381E8"/>
+	</radialGradient>
+	<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
+		c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
+	<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
+		L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
+		z"/>
+	<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
+		C53.256,72.793,54.854,69.772,54.854,69.772z"/>
+	<g id="path5447" filter="url(#filter5365)">
+		<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 		"/>
+		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305 
+			123.882,28.85 74.326,45.299 		"/>
+	</g>
+	<g id="path5449" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
+			l2.321,16.552L122.567,49.116z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
+			c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
+	</g>
+	<g id="path5451" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
+			C72.863,49.94,74.641,47.024,74.641,47.024z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
+			c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
+	</g>
+	<g id="path5453" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
+			C21.902,42.502,21.042,36.088,25.408,27.822z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
+			L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
+	</g>
+	<g id="path5455" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
+			l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
+			c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
+	</g>
+	<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
+	<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
+		c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
+	
+		<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
+		<stop  offset="0" style="stop-color:#FD8A8A"/>
+		<stop  offset="1" style="stop-color:#FF7878"/>
+	</radialGradient>
+	<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
+		c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
+	
+		<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
+		<stop  offset="0" style="stop-color:#FFFFFF"/>
+		<stop  offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
+	</linearGradient>
+	<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
+	<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
+		l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
+		L122.148,46.135z"/>
+	<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
+		C70.399,48.864,71.997,45.844,71.997,45.844z"/>
+</g>
+</svg>
--- a/resources/images/library.png
+++ b/resources/images/library.png
--- a/resources/images/news/radikal_tr.png
+++ b/resources/images/news/radikal_tr.png
--- a/resources/images/news/sueddeutschezeitung.png
+++ b/resources/images/news/sueddeutschezeitung.png
--- a/resources/images/news/wired_uk.png
+++ b/resources/images/news/wired_uk.png
--- a/resources/kathemerini.recipe
+++ b/resources/kathemerini.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Kathimerini(BasicNewsRecipe):
+    title                  = 'Kathimerini'
+    __author__             = 'Pan'
+    description            = 'News from Greece'
+    max_articles_per_feed  = 100
+    oldest_article = 100
+    publisher              = 'Kathimerini'
+    category               = 'news, GR'
+    language               = 'el'
+    no_stylesheets         = True
+    remove_tags_before = dict(name='td',attrs={'class':'news'})
+    remove_tags_after = dict(name='td',attrs={'class':'news'})
+    remove_attributes = ['width', 'src','header','footer']
+
+    feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
+  'http://wk.kathimerini.gr/xml_files/politics.xml'),
+ (u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
+  ' http://wk.kathimerini.gr/xml_files/ell.xml'),
+ (u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
+  ' http://wk.kathimerini.gr/xml_files/world.xml'),
+ (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
+  'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
+ (u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
+  'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
+ (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
+  'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
+ (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
+  'http://wk.kathimerini.gr/xml_files/civ.xml'),
+ (u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
+  'http://wk.kathimerini.gr/xml_files/st.xml')]
+
+    def print_version(self, url):
+        return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
+
+
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 clarin.com
 '''
@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe):
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    encoding              = 'cp1252'
    language              = 'es'
-    extra_css             = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
+    masthead_url          = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} '

    conversion_options = {
                          'comment'  : description
--- a/resources/recipes/courrier.recipe
+++ b/resources/recipes/courrier.recipe
@ -0,0 +1,26 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class CourierPress(BasicNewsRecipe):
+    title          = u'Courier Press'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+
+    remove_stylesheets = True
+    remove_tags = [
+       dict(name='iframe'),
+    ]
+
+    feeds          = [
+('Courier Press',
+ 'http://www.courierpress.com/rss/headlines/news/'),
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article_body'})
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -1,64 +1,63 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 danas.rs
 '''
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Danas(BasicNewsRecipe):
    title                 = 'Danas'
    __author__            = 'Darko Miletic'
-    description           = 'Vesti'
+    description           = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
    publisher             = 'Danas d.o.o.'
    category              = 'news, politics, Serbia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
+    encoding              = 'utf-8'
+    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
    language              = 'sr'
-    lang                  = 'sr-Latn-RS'
-    direction             = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
-                        , 'pretty_print'     : True
                        }

-
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
    remove_tags = [
                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
                    ,dict(name='div', attrs={'id':'comments'})
-                    ,dict(name=['object','link'])
+                    ,dict(name=['object','link','iframe'])
                  ]

-    feeds          = [ 
-                        (u'Vesti'   , u'http://www.danas.rs/rss/rss.asp'            )
-                       ,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
+    feeds          = [
+                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
+                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
+                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
+                       ,(u'Dijalog'  , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
+                       ,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
+                       ,(u'Svet'     , u'http://www.danas.rs/rss/rss.asp?column_id=25')
+                       ,(u'Srbija'   , u'http://www.danas.rs/rss/rss.asp?column_id=28')
+                       ,(u'Kultura'  , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
+                       ,(u'Sport'    , u'http://www.danas.rs/rss/rss.asp?column_id=13')
+                       ,(u'Scena'    , u'http://www.danas.rs/rss/rss.asp?column_id=42')
+                       ,(u'Feljton'  , u'http://www.danas.rs/rss/rss.asp?column_id=19')
+                       ,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
                     ]

    def preprocess_html(self, soup):
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
+        for item in soup.findAll(style=True):
+            del item['style']
        return soup
+
+    def print_version(self, url):
+        return url + '&action=print'
+
--- a/resources/recipes/di.recipe
+++ b/resources/recipes/di.recipe
@ -0,0 +1,60 @@
+#!/usr/bin/env  python
+
+__license__	= 'GPL v3'
+__author__ = 'Mori'
+__version__ = 'v. 0.5'
+'''
+di.com.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class DziennikInternautowRecipe(BasicNewsRecipe):
+	__author__ = 'Mori'
+	language = 'pl'
+
+	title = u'Dziennik Internautow'
+	publisher = u'Dziennik Internaut\xc3\xb3w Sp. z o.o.'
+	description =u'Internet w \xc5\xbcyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\xc5\x84stwo w Sieci, technologia.'
+
+	max_articles_per_feed = 100
+	oldest_article = 7
+	cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'
+
+	no_stylesheets = True
+	remove_javascript = True
+	encoding = 'utf-8'
+
+	extra_css = '''
+		.fotodesc{font-size: 75%;}
+		.pub_data{font-size: 75%;}
+		.fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
+		#pub_foto{font-size: 75%; float: left; padding-right: 10px;}
+	'''
+
+	feeds = [
+		(u'Dziennik Internautów', u'http://feeds.feedburner.com/glowny-di')
+	]
+
+	keep_only_tags = [
+		dict(name = 'div', attrs = {'id' : 'pub_head'}),
+		dict(name = 'div', attrs = {'id' : 'pub_content'})
+	]
+
+	remove_tags = [
+		dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
+		dict(name = 'div', attrs = {'class' : 'uniBox'}),
+		dict(name = 'object', attrs = {}),
+		dict(name = 'h3', attrs = {})
+	]
+
+	preprocess_regexps = [
+		(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+		[
+			(r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
+			(r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
+			(r'http://di.com.pl/pic/photo/mini/', lambda match: 'http://di.com.pl/pic/photo/oryginal/'),
+			(r'\s*</', lambda match: '</'),
+		]
+	]
--- a/resources/recipes/dilbert.recipe
+++ b/resources/recipes/dilbert.recipe
@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.dilbert.com
 '''
+import re

 from calibre.web.feeds.recipes import BasicNewsRecipe

@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):

    feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]

+    preprocess_regexps = [
+                    (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
+                        lambda match: 'strip.zoom.gif')
+                            ]
+
+
    def get_article_url(self, article):
        return article.get('feedburner_origlink', None)

--- a/resources/recipes/eclicto.recipe
+++ b/resources/recipes/eclicto.recipe
@ -0,0 +1,49 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'Mori'
+__version__ = 'v. 0.1'
+'''
+blog.eclicto.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class BlogeClictoRecipe(BasicNewsRecipe):
+    __author__ = 'Mori'
+    language = 'pl'
+
+    title = u'Blog eClicto'
+    publisher = u'Blog eClicto'
+    description = u'Blog o e-papierze i e-bookach'
+
+    max_articles_per_feed = 100
+    cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
+
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'
+
+    extra_css = '''
+        img{float: left; padding-right: 10px; padding-bottom: 5px;}
+    '''
+
+    feeds = [
+        (u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
+    ]
+
+    remove_tags = [
+        dict(name = 'span', attrs = {'id' : 'tags'})
+    ]
+
+    remove_tags_after = [
+        dict(name = 'div', attrs = {'class' : 'post'})
+    ]
+
+    preprocess_regexps = [
+        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+        [
+            (r'\s*</', lambda match: '</'),
+        ]
+    ]
--- a/resources/recipes/eksiazki.recipe
+++ b/resources/recipes/eksiazki.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+eksiazki.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class eksiazki(BasicNewsRecipe):
+
+    title          = u'eKsiazki.org'
+    desciption     = u'Twoje centrum wiedzy o ePapierze i eBookach'
+    language = 'pl'
+    __author__ = u'Tomasz D\u0142ugosz'
+    no_stylesheets = True
+    remove_javascript = True
+
+    feeds          = [(u'eKsiazki.org', u'http://www.eksiazki.org/feed/')]
+
+    keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
+    remove_tags = [
+        dict(name='span', attrs={'class':'nr_comm'}),
+        dict(name='div', attrs={'id':'tabsContainer'}),
+        dict(name='div', attrs={'class':'next_previous_links'})]
--- a/resources/recipes/houston_chronicle.recipe
+++ b/resources/recipes/houston_chronicle.recipe
@ -1,17 +1,41 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe

 class HoustonChronicle(BasicNewsRecipe):

    title          = u'The Houston Chronicle'
    description    = 'News from Houston, Texas'
-    __author__	   = 'Kovid Goyal'
+    __author__	   = 'Kovid Goyal and Sujata Raman'
    language       = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True

-    keep_only_tags = [dict(id=['story-head', 'story'])]
-    remove_tags = [dict(id=['share-module', 'resource-box',
-        'resource-box-header'])]
+    keep_only_tags = [
+                        dict(id=['story-head', 'story'])
+                     ]
+
+    remove_tags    = [
+                        dict(id=['share-module', 'resource-box',
+                        'resource-box-header'])
+                     ]
+
+    extra_css      = '''
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
+                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                        #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
+                        #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
+                        #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
+                        #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
+                        .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
+                        .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
+                     '''
+

    def parse_index(self):
        soup = self.index_to_soup('http://www.chron.com/news/')
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
            feeds.append((current_section, current_articles))
        return feeds

+
+
+
--- a/resources/recipes/interia_fakty.recipe
+++ b/resources/recipes/interia_fakty.recipe
@ -0,0 +1,38 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+fakty.interia.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class InteriaFakty(BasicNewsRecipe):
+    title          = u'Interia.pl - Fakty'
+    language = 'pl'
+    oldest_article = 7
+    __author__ = u'Tomasz D\u0142ugosz'
+    simultaneous_downloads = 2
+    no_stylesheets = True
+    remove_javascript = True
+    max_articles_per_feed = 100
+
+    feeds          = [(u'Kraj', u'http://kanaly.rss.interia.pl/kraj.xml'),
+                      (u'\u015awiat', u'http://kanaly.rss.interia.pl/swiat.xml'), 
+                      (u'Wiadomo\u015bci dnia', u'http://kanaly.rss.interia.pl/fakty.xml'), 
+                      (u'Przegl\u0105d prasy', u'http://kanaly.rss.interia.pl/przeglad_prasy.xml'), 
+                      (u'Wywiady', u'http://kanaly.rss.interia.pl/wywiady.xml'), 
+                      (u'Ciekawostki', u'http://kanaly.rss.interia.pl/ciekawostki.xml')]
+
+    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+
+    remove_tags = [
+        dict(name='div', attrs={'class':'box fontSizeSwitch'}),
+        dict(name='div', attrs={'class':'clear'}),
+        dict(name='div', attrs={'class':'embed embedLeft articleEmbedArticleList articleEmbedArticleListTitle'}),
+        dict(name='span', attrs={'class':'keywords'})]
+
+    extra_css = '''
+	    h2 { font-size: 1.2em; }
+	'''
--- a/resources/recipes/interia_sport.recipe
+++ b/resources/recipes/interia_sport.recipe
@ -0,0 +1,71 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+sport.interia.pl
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class InteriaSport(BasicNewsRecipe):
+    title          = u'Interia.pl - Sport'
+    language = 'pl'
+    oldest_article = 7
+    __author__ = u'Tomasz D\u0142ugosz'
+    simultaneous_downloads = 3
+    no_stylesheets = True
+    remove_javascript = True
+    max_articles_per_feed = 100
+
+    feeds          = [(u'Wydarzenia sportowe', u'http://kanaly.rss.interia.pl/sport.xml'), 
+                      (u'Pi\u0142ka no\u017cna', u'http://kanaly.rss.interia.pl/pilka_nozna.xml'), 
+                      (u'Siatk\xf3wka', u'http://kanaly.rss.interia.pl/siatkowka.xml'), 
+                      (u'Koszyk\xf3wka', u'http://kanaly.rss.interia.pl/koszykowka.xml'), 
+                      (u'NBA', u'http://kanaly.rss.interia.pl/nba.xml'), 
+                      (u'Kolarstwo', u'http://kanaly.rss.interia.pl/kolarstwo.xml'), 
+                      (u'\u017bu\u017cel', u'http://kanaly.rss.interia.pl/zuzel.xml'), 
+                      (u'Tenis', u'http://kanaly.rss.interia.pl/tenis.xml')]
+
+    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+
+    remove_tags = [dict(name='div', attrs={'class':'object gallery'})]
+
+    extra_css = '''
+        .articleDate {
+        font-size: 0.5em;
+        color: black;
+        }
+
+        .articleFoto {
+        display: block;
+        font-family: sans;
+        font-size: 0.5em;
+        text-indent: 0
+        color: black;
+        }
+
+        .articleText {
+        display: block;
+        margin-bottom: 1em;
+        margin-left: 0;
+        margin-right: 0;
+        margin-top: 1em
+        color: black;
+        }
+
+        .articleLead {
+        font-size: 1.2em;
+        }
+        '''
+
+    preprocess_regexps = [
+        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
+        [
+            (r'<p><a href.*?</a></p>', lambda match: ''),
+           # FIXME
+           #(r'(<div id="newsAddContent">)(.*?)(<a href=".*">)(.*?)(</a>)', lambda match: '\1\2\4'),
+            (r'<p>(<i>)?<b>(ZOBACZ|CZYTAJ) T.*?</div>', lambda match: '</div>')
+        ]
+    ]
--- a/resources/recipes/jpost.recipe
+++ b/resources/recipes/jpost.recipe
@ -10,22 +10,19 @@ class JerusalemPost(BasicNewsRecipe):
    __author__ = 'Kovid Goyal'
    max_articles_per_feed = 10
    no_stylesheets = True
-    remove_tags_before = {'class':'byline'}
-    remove_tags    = [
-                      {'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox',
-                                'slideshow']},
-                       dict(id=['artFontButtons', 'artRelatedBlock']),
-                     ]
-    remove_tags_after = {'id':'artTxtBlock'}
-    
+    remove_tags_before = {'class':'jp-grid-content'}
+    remove_tags_after = {'id':'body_val'}
+
    feeds =  [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
               ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
               ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'),
               ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'),
               ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
          ]
-          
-    def postprocess_html(self, soup, first):
-        for tag in soup.findAll(name=['table', 'tr', 'td']):
-            tag.name = 'div'
-        return soup
+
+    def preprocess_html(self, soup):
+        for x in soup.findAll(name=['form', 'input']):
+            x.name = 'div'
+        for x in soup.findAll('body', style=True):
+            del x['style']
+        return soup
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lanacion.com.ar
 '''
@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe):
    title                 = 'La Nacion'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y el resto del mundo'
-    publisher             = 'La Nacion'
+    publisher             = 'La Nacion S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    use_embedded_content  = False
-    remove_javascript     = True
    no_stylesheets        = True
+    language              = 'es'
+    encoding              = 'cp1252'
+    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
+    extra_css             = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} '

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]

-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }

    keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
                    ,dict(name='ul'  , attrs={'class':'cajaHerramientas cajaTop noprint'})
                    ,dict(name='div' , attrs={'class':'cajaHerramientas noprint'        })
+                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color']})
+                    ,dict(name=['iframe','embed','object'])
                  ]
+    remove_attributes = ['height','width']

    feeds          = [
                         (u'Ultimas noticias'     , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2'         )
@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe):
                     ]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-
-    language = 'es'
+        return self.adeify_images(soup)
--- a/resources/recipes/legeartis.recipe
+++ b/resources/recipes/legeartis.recipe
@ -0,0 +1,43 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'Mori'
+__version__ = 'v. 0.1'
+'''
+olgierd.bblog.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LegeArtisRecipe(BasicNewsRecipe):
+    __author__ = 'Mori'
+    language = 'pl'
+
+    title = u'Lege Artis'
+    publisher = u'Olgierd Rudak'
+    description = u'Wszystko, co chcieliby\xc5\x9bcie wiedzie\xc4\x87 o prawie, ale wstydzicie si\xc4\x99 zapyta\xc4\x87'
+
+    max_articles_per_feed = 100
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    extra_css = '''
+        img{clear: both;}
+    '''
+
+    feeds = [
+        (u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
+    ]
+
+    keep_only_tags = [
+        dict(name = 'div', attrs = {'class' : 'post_title'}),
+        dict(name = 'div', attrs = {'class' : 'post_date'}),
+        dict(name = 'div', attrs = {'class' : 'post_content'})
+    ]
+
+    remove_tags = [
+        dict(name = 'div', attrs = {'id' : 'bb_tools'}),
+        dict(name = 'div', attrs = {'class' : 'post_comments'}),
+        dict(name = 'object', attrs = {})
+    ]
--- a/resources/recipes/legitymizm.recipe
+++ b/resources/recipes/legitymizm.recipe
@ -0,0 +1,49 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+legitymizm.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Legitymizm(BasicNewsRecipe):
+    title          = u'Organizacja Monarchist\xf3w Polskich'
+    language = 'pl'
+    oldest_article = 7
+    __author__ = u'Tomasz D\u0142ugosz'
+    max_articles_per_feed = 100
+    cover_url      = 'http://www.legitymizm.org/img_omp/logo.gif'
+    no_stylesheets = True
+
+    feeds          = [(u'Aktualno\u015bci i publicystyka', u'http://www.legitymizm.org/rss.php')]
+
+    keep_only_tags = [dict(name='div', attrs={'id':'szeroka_kolumna'})]
+    remove_tags    = [dict(name = 'div', attrs = {'class' : 'koniec_tresci_wlasciwej'}),
+                      dict(name = 'div', attrs = {'class' : 'return'})]
+
+    extra_css = '''
+        body { font-family: Georgia, 'Times New Roman', Times, serif; }
+        h1 { color: #898981; font-weight: normal; font-size: 26px; letter-spacing: -1px; line-height: 23px; text-align: left; }
+        h2, h3 { font-weight: normal; font-size: 20px; line-height: 23px; letter-spacing: -1px; margin: 0 0 3px 0; text-align: left; }
+        #szeroka_kolumna { float: left; line-height: 20px; }
+        #szeroka_kolumna ul.wykaz { list-style-type: none; margin: 0 0 1.2em 0; padding: 0; }
+        #szeroka_kolumna ul.wykaz li.wykaz_2 { font-weight: bold; margin: 0.6em 0 0 0; }
+        #szeroka_kolumna ul.wykaz a { text-decoration: none; }
+        #szeroka_kolumna ul.wykaz li.wykaz_1, #szeroka_kolumna ul.wykaz li.wykaz_2 ul li { list-style-type: square; color: #898981; text-transform: none; font-weight: normal; padding: 0; } 
+        #szeroka_kolumna ul.wykaz li.wykaz_1 { margin: 0 0 0 1.3em; }
+        #szeroka_kolumna ul.wykaz li.wykaz_2 ul { margin: 0; padding: 0 0 0 1.3em; }
+        #szeroka_kolumna h3.autor { background-color: #898981; color: #f9f9f8; margin: -25px 0px 30px 0; text-align: left; padding: 0 0 0 2px; }
+        .tresc_wlasciwa { border-top: 1px solid #898981; padding: 30px 0px 0px 0px; position: relative; }
+        #cytat { font-size: 11px; line-height: 19px; font-style: italic; text-align: justify; }
+        #cytat img { width: 100px; height: 105px; float: right; margin: 3px 0 0 10px; }
+        .duzy_cytat { padding: 20px 20px 10px 20px; margin: 0 0 1.2em 0; }
+        #szeroka_kolumna img, #szeroka_kolumna object { padding: 3px; border: 1px solid #898981; }
+        #szeroka_kolumna img.ilustracja { margin: 0px 10px 0 0; float: left; }
+        p { margin: 0 0 1.2em 0; }
+        #cytat p.sentencja { margin: 0; }
+        #cytat p.sentencja:first-letter { font-size: 44px; line-height: 33px; margin: 0 2px 0 0; font-style: normal; float: left; display: block; }
+        p.autor { text-transform: uppercase; color: #898981; font-style: normal; text-align: left; }
+    '''
+
--- a/resources/recipes/michalkiewicz.recipe
+++ b/resources/recipes/michalkiewicz.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+michalkiewicz.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+# 
+
+class michalkiewicz(BasicNewsRecipe):
+    title          = u'Stanis\u0142aw Michalkiewicz'
+    desciption     = u'Strona autorska * felietony * artyku\u0142y * komentarze'
+    __author__     = u'Tomasz D\u0142ugosz'
+    language       = 'pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+
+    keep_only_tags = [dict(name='div', attrs={'class':'modul_srodek'})]
+    remove_tags = [dict(name='ul', attrs={'class':'menu'})]
+
+    feeds          = [(u'Teksty', u'http://www.michalkiewicz.pl/rss.xml')]
+
--- a/resources/recipes/nczas.recipe
+++ b/resources/recipes/nczas.recipe
@ -0,0 +1,35 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+nczas.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+# 
+
+class NCzas(BasicNewsRecipe):
+    title          = u'Najwy\u017cszy Czas!'
+    desciption     = u'Najwy\u017cszy Czas!\nwydanie internetowe'
+    __author__     = u'Tomasz D\u0142ugosz'
+    language       = 'pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url      = 'http://nczas.com/wp-content/themes/default/grafika/logo.png'
+
+    keep_only_tags = [dict(name='div', attrs={'class':'trescartykulu'})]
+
+    feeds          = [(u'Najwy\u017cszy Czas!', u'http://nczas.com/feed/')]
+
+    def postprocess_html(self, soup, first):
+
+            for tag in soup.findAll(name= 'img', alt=""):
+                    tag.extract()
+
+            for item in soup.findAll(align = "right"):
+                del item['align']
+
+            return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
                   dict(name=['script', 'noscript', 'style'])]
    encoding = decode
    no_stylesheets = True
-    extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+    extra_css = 'h1 {font-face:sans-serif; font-size:2em; font-weight:bold;}\n.byline {font:monospace;}\n.bold {font-weight:bold;}'

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -15,14 +15,14 @@ class Pagina12(BasicNewsRecipe):
    publisher             = 'La Pagina S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
-    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '

    conversion_options = {
                          'comment'   : description
@ -45,7 +45,9 @@ class Pagina12(BasicNewsRecipe):
             ,(u'NO'             , u'http://www.pagina12.com.ar/diario/rss/no.xml'          )
             ,(u'Las/12'         , u'http://www.pagina12.com.ar/diario/rss/las12.xml'       )
             ,(u'Soy'            , u'http://www.pagina12.com.ar/diario/rss/soy.xml'         )
-             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
+             ,(u'Futuro'         , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
+             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/m2.xml'          )
+             ,(u'Rosario/12'     , u'http://www.pagina12.com.ar/diario/rss/rosario.xml'     )
            ]

    def print_version(self, url):
@ -60,3 +62,7 @@ class Pagina12(BasicNewsRecipe):
              return image['src']
        return None

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/queleer.recipe
+++ b/resources/recipes/queleer.recipe
@ -0,0 +1,56 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.que-leer.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class QueLeer(BasicNewsRecipe):
+    title                 = 'Que Leer'
+    __author__            = 'Darko Miletic'
+    description           = 'Libros, Novedades en libros, Criticas, Noticias libro'
+    publisher             = 'MC Ediciones, S.A.'
+    category              = 'news, books, criticas, libros'
+    oldest_article        = 7
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.que-leer.com/wp-content/themes/queleer/images/backgrounds/que-leer.jpg'
+    extra_css             = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    preprocess_regexps = [(re.compile(r'<h2 class="izq">.*?</body>', re.DOTALL|re.IGNORECASE),lambda match: '')]
+
+    remove_tags = [
+                     dict(attrs={'class':['post-ratings','post-ratings-loading','sociable','toc-anchor']})
+                    ,dict(name=['object','embed','iframe','link'])
+                    ,dict(attrs={'id':'left'})
+                  ]
+
+    remove_tags_after = dict(attrs={'class':'sociable'})
+    remove_attributes = ['width','height']
+    keep_only_tags    = [dict(attrs={'class':'post'})]
+
+    feeds = [(u'Articulos', u'http://www.que-leer.com/feed')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        url = 'http://www.que-leer.com/comprar-libros-tienda-que-leer/libros-recomendados'
+        fitem = soup.find('a',href=url)
+        if fitem:
+           par = fitem.parent
+           par.extract()
+        return self.adeify_images(soup)
+
--- a/resources/recipes/radikal_tr.recipe
+++ b/resources/recipes/radikal_tr.recipe
@ -0,0 +1,45 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+radikal.com.tr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Radikal_tr(BasicNewsRecipe):
+    title                 = 'Radikal - Turkey'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Turkey'
+    publisher             = 'radikal'
+    category              = 'news, politics, Turkey'
+    oldest_article        = 2
+    max_articles_per_feed = 150
+    no_stylesheets        = True
+    encoding              = 'cp1254'
+    use_embedded_content  = False
+    masthead_url          = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
+    language              = 'tr'
+    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif } '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['embed','iframe','object','link','base'])]
+    remove_tags_before = dict(name='h1')
+    remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
+
+
+    feeds = [(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')]
+
+    def print_version(self, url):
+        articleid = url.rpartition('ArticleID=')[2]
+        return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
--- a/resources/recipes/san_fran_chronicle.recipe
+++ b/resources/recipes/san_fran_chronicle.recipe
@ -7,10 +7,11 @@ sfgate.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+import re

 class SanFranciscoChronicle(BasicNewsRecipe):
    title                 = u'San Francisco Chronicle'
-    __author__            = u'Darko Miletic'
+    __author__            = u'Darko Miletic and Sujata Raman'
    description           = u'San Francisco news'
    language = 'en'

@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False

-    remove_tags_before = {'class':'articleheadings'}
-    remove_tags_after =  dict(name='div', attrs={'id':'articlecontent' })
-    remove_tags = [
-                     dict(name='div', attrs={'class':'tools tools_top'})
-                    ,dict(name='div', attrs={'id':'articlebox'        })
-                  ]
+
+
+    remove_tags_before  = {'id':'printheader'}
+
+    remove_tags         = [
+                            dict(name='div',attrs={'id':'printheader'})
+                           ,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
+                           ,dict(name='div',attrs={'id':'footer'})
+                          ]
+
+    extra_css       = '''
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
+                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                        .byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        .date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        .dtlcomment{font-style:italic;}
+                        .georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
+                     '''

    feeds          = [
                         (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
                     ]
+
+    def print_version(self,url):
+        url= url +"&type=printable"
+        return url
+
+    def get_article_url(self, article):
+        print str(article['title_detail']['value'])
+        url = article.get('guid',None)
+        url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
+        if "Presented By:" in str(article['title_detail']['value']):
+            url = ''
+        return url
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/resources/recipes/sueddeutschezeitung.recipe
+++ b/resources/recipes/sueddeutschezeitung.recipe
@ -0,0 +1,107 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.sueddeutsche.de/sz/
+'''
+
+import urllib
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SueddeutcheZeitung(BasicNewsRecipe):
+    title                  = 'Sueddeutche Zeitung'
+    __author__             = 'Darko Miletic'
+    description            = 'News from Germany. Access to paid content.'
+    publisher              = 'Sueddeutche Zeitung'
+    category               = 'news, politics, Germany'
+    no_stylesheets         = True
+    oldest_article         = 2
+    encoding               = 'cp1252'
+    needs_subscription     = True
+    remove_empty_feeds     = True
+    PREFIX                 = 'http://www.sueddeutsche.de'
+    INDEX                  = PREFIX + strftime('/sz/%Y-%m-%d/')
+    LOGIN                  = PREFIX + '/app/lbox/index.html'
+    use_embedded_content   = False
+    masthead_url           = 'http://pix.sueddeutsche.de/img/g_.gif'
+    language               = 'de'
+    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+    remove_attributes = ['height','width']
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open(self.INDEX)
+        if self.username is not None and self.password is not None:
+            data = urllib.urlencode({ 'login_name':self.username
+                                     ,'login_passwort':self.password
+                                     ,'lboxaction':'doLogin'
+                                     ,'passtxt':'Passwort'
+                                     ,'referer':self.INDEX
+                                     ,'x':'22'
+                                     ,'y':'7'
+                                   })
+            br.open(self.LOGIN,data)
+        return br
+
+    remove_tags        =[
+                         dict(attrs={'class':'hidePrint'})
+                        ,dict(name=['link','object','embed','base','iframe'])
+                        ]
+    remove_tags_before = dict(name='h2')
+    remove_tags_after  = dict(attrs={'class':'author'})
+
+    feeds = [
+               (u'Politik'      , INDEX + 'politik/'      )
+              ,(u'Seite drei'   , INDEX + 'seitedrei/'    )
+              ,(u'Meinungsseite', INDEX + 'meinungsseite/')
+              ,(u'Wissen'       , INDEX + 'wissen/'       )
+              ,(u'Panorama'     , INDEX + 'panorama/'     )
+              ,(u'Feuilleton'   , INDEX + 'feuilleton/'   )
+              ,(u'Medien'       , INDEX + 'medien/'       )
+              ,(u'Wirtschaft'   , INDEX + 'wirtschaft/'   )
+              ,(u'Sport'        , INDEX + 'sport/'        )
+              ,(u'Bayern'       , INDEX + 'bayern/'       )
+              ,(u'Muenchen'     , INDEX + 'muenchen/'     )
+              ,(u'jetzt.de'     , INDEX + 'jetzt.de/'     )
+            ]
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            tbl = soup.find(attrs={'class':'szprintd'})
+            for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
+                atag    = item.find(attrs={'class':'Titel'}).a
+                ptag    = item.find('p')
+                stag    = ptag.find('script')
+                if stag:
+                   stag.extract()
+                url           = self.PREFIX + atag['href']
+                title         = self.tag_to_string(atag)
+                description   = self.tag_to_string(ptag)
+                articles.append({
+                                      'title'      :title
+                                     ,'date'       :strftime(self.timefmt)
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
+    def print_version(self, url):
+        return url + 'print.html'
+
--- a/resources/recipes/telegraph_uk.recipe
+++ b/resources/recipes/telegraph_uk.recipe
@ -9,8 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class TelegraphUK(BasicNewsRecipe):
    title                 = u'Telegraph.co.uk'
-    __author__            = 'Darko Miletic'
-    description           = 'News from United Kingdom'    
+    __author__            = 'Darko Miletic and Sujata Raman'
+    description           = 'News from United Kingdom'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
@ -18,23 +18,26 @@ class TelegraphUK(BasicNewsRecipe):

    use_embedded_content  = False

-    extra_css = '''
-                h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
-                h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444}
-                .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
-                .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                a{color:#234B7B; }
-                .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                '''
-    
-    keep_only_tags    = [ 
+    extra_css           = '''
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
+                        .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        a{color:#234B7B; }
+                        .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        '''
+
+    keep_only_tags      = [
                           dict(name='div', attrs={'class':'storyHead'})
                          ,dict(name='div', attrs={'class':'story'    })
-                          #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ]   }) 
-                        ]
-    remove_tags    = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})]
-    
-    feeds          = [
+                          #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ]   })
+                          ]
+    remove_tags         = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
+                          #,dict(name='div', attrs={'class':['toolshideoneQuarter']})
+                          ,dict(name='span', attrs={'class':['num','placeComment']})
+                          ]
+
+    feeds               = [
                         (u'UK News'        , u'http://www.telegraph.co.uk/news/uknews/rss'                                      )
                        ,(u'World News'     , u'http://www.telegraph.co.uk/news/worldnews/rss'                                   )
                        ,(u'Politics'       , u'http://www.telegraph.co.uk/news/newstopics/politics/rss'                         )
@ -45,15 +48,27 @@ class TelegraphUK(BasicNewsRecipe):
                        ,(u'Earth News'     , u'http://www.telegraph.co.uk/earth/earthnews/rss'                                  )
                        ,(u'Comment'        , u'http://www.telegraph.co.uk/comment/rss'                                          )
                        ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss'                     )
-                     ]
+                         ]

    def get_article_url(self, article):
-        
+
        url = article.get('guid', None)
-        
+
        if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
            url = None
-        
+
        return url

-   
+
+    def postprocess_html(self,soup,first):
+
+        for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
+            for pTag in bylineTag.findAll(name='p'):
+                if getattr(pTag.contents[0],"Comments",True):
+                    pTag.extract()
+        return soup
+
+
+
+
+
--- a/resources/recipes/toronto_sun.recipe
+++ b/resources/recipes/toronto_sun.recipe
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class TorontoSun(BasicNewsRecipe):
    title                 = 'Toronto SUN'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Darko Miletic and Sujata Raman'
    description           = 'News from Canada'
    publisher             = 'Toronto Sun'
    category              = 'news, politics, Canada'
@ -21,25 +21,50 @@ class TorontoSun(BasicNewsRecipe):
    encoding              = 'cp1252'
    language              = 'en_CA'

-    conversion_options = {
-                          'comment'   : description
-                        , 'tags'      : category
-                        , 'publisher' : publisher
-                        , 'language'  : language
-                        }
+    conversion_options  = {
+                              'comment'   : description
+                            , 'tags'      : category
+                            , 'publisher' : publisher
+                            , 'language'  : language
+                          }

-    keep_only_tags    =[
-                         dict(name='div', attrs={'class':'articleHead'})
-                         ,dict(name='div', attrs={'id':'channelContent'})
-                       ]
-    remove_tags = [
-                      dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
-                     ,dict(name=['link','iframe','object'])
-                     ,dict(name='a',attrs={'rel':'swap'})
-                     ,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
-                  ]
+    keep_only_tags      = [
+                               dict(name='div', attrs={'class':['articleHead','leftBox']})
+                              ,dict(name='div', attrs={'id':'channelContent'})
+                              ,dict(name='div', attrs={'id':'rotateBox'})
+                              ,dict(name='img')
+                          ]
+    remove_tags         = [
+                              dict(name='div',attrs={'class':['bottomBox clear','bottomBox','breadCrumb','articleControls thin','articleControls thin short','extraVideoList']})
+                             ,dict(name='h2',attrs={'class':'microhead'})
+                             ,dict(name='div',attrs={'id':'commentsBottom'})
+                             ,dict(name=['link','iframe','object'])
+                             ,dict(name='a',attrs={'rel':'swap'})
+                             ,dict(name='a',attrs={'href':'/news/haiti/'})
+                             ,dict(name='ul',attrs={'class':['tabs dl contentSwap','micrositeNav clearIt hList','galleryNav rotateNav']})
+                          ]
+
+    remove_tags_after   = [
+                            dict(name='div',attrs={'class':'bottomBox clear'})
+                           ,dict(name='div',attrs={'class':'rotateBox'})
+                           ,dict(name='div',attrs={'id':'contentSwap'})
+                          ]
+
+
+    extra_css = '''
+                h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
+                h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                .bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
+                .subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
+                .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                .byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
+                .updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                .galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                .galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                '''

-    remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})

    feeds = [
              (u'News'       , u'http://www.torontosun.com/news/rss.xml'           )
@ -48,3 +73,19 @@ class TorontoSun(BasicNewsRecipe):
             ,(u'World'      , u'http://www.torontosun.com/news/world/rss.xml'     )
             ,(u'Money'      , u'http://www.torontosun.com/money/rss.xml'          )
            ]
+
+    def preprocess_html(self, soup):
+        ##To fetch images from the specified source
+        for img in soup.findAll('img', src=True):
+            url= img.get('src').split('?')[-1].partition('=')[-1]
+            if url:
+                img['src'] = url.split('&')[0].partition('=')[0]
+                img['width'] = url.split('&')[-1].partition('=')[-1].split('x')[0]
+                img['height'] =url.split('&')[-1].partition('=')[-1].split('x')[1]
+        return soup
+
+
+
+
+
+
--- a/resources/recipes/variety.recipe
+++ b/resources/recipes/variety.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.variety.com
 '''
@ -20,8 +18,10 @@ class Variety(BasicNewsRecipe):
    publisher              = 'Red Business Information'
    category               = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
    language               = 'en'
+    masthead_url           = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
+    extra_css              = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '

-    conversion_options = {  
+    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
@ -31,7 +31,7 @@ class Variety(BasicNewsRecipe):
    remove_tags = [dict(name=['object','link','map'])]

    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
-                  
+
    feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]

    def print_version(self, url):
@ -41,6 +41,6 @@ class Variety(BasicNewsRecipe):
        catid = catidr.partition('&')[0]
        return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid

-    def get_article_url(self, article):
-        return article.get('feedburner_origlink',  None)

+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/winter_olympics.recipe
+++ b/resources/recipes/winter_olympics.recipe
@ -0,0 +1,95 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Starson17'
+'''
+www.nbcolympics.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Olympics_2010(BasicNewsRecipe):
+    title          = u'NBC Olympics 2010'
+    __author__  = 'Starson17'
+    description = 'Olympics 2010'
+    cover_url     = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
+    publisher      = 'Olympics 2010'
+    tags           = 'Olympics news'
+    language = 'en'
+    use_embedded_content    = False
+    no_stylesheets        = True
+    remove_javascript = True
+    # recursions = 3
+    oldest_article        = 7
+    max_articles_per_feed = 10
+
+    keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}),
+                      ]
+
+    remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}),
+                   dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}),
+                   ]
+
+    # RSS feeds are at: http://www.nbcolympics.com/rss/index.html
+    feeds = [
+             ('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
+             ('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
+             ('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
+             # ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
+             # ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
+             # ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
+             ('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
+             # ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
+             # ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
+             # ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
+             # ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
+             ('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
+             # ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
+             # ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
+             ('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
+             # ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
+             # ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
+             ('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
+             # ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
+             # ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
+             ('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
+             # ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
+             # ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
+             ('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
+             # ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
+             # ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
+             ('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
+             # ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
+             # ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
+             ('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
+             # ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
+             # ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
+             ('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
+             # ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
+             # ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
+             ('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
+             # ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
+             # ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
+             ('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
+             # ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
+             # ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
+             ('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
+             # ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
+             # ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
+             ('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
+             # ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
+             # ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
+             ('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
+             # ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
+             # ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
+             ('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
+             # ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
+             # ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
+             ('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
+             ]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        '''
--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
+    masthead_url          = 'http://www.wired.com/images/home/wired_logo.gif'
    language              = 'en'
    extra_css             = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
    index                 = 'http://www.wired.com/magazine/'
@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe):
                     dict(name=['object','embed','iframe','link'])
                    ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
                  ]
+    remove_attributes = ['height','width']              


-    #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
-
    def parse_index(self):
        totalfeeds = []

-        soup = self.index_to_soup(self.index)
+        soup   = self.index_to_soup(self.index)
+        majorf = soup.find('div',attrs={'class':'index'})
+        if majorf:
+           pfarticles = []
+           firsta = majorf.find(attrs={'class':'spread-header'})
+           if firsta:
+              pfarticles.append({
+                                  'title'      :self.tag_to_string(firsta.a)
+                                 ,'date'       :strftime(self.timefmt)
+                                 ,'url'        :'http://www.wired.com' + firsta.a['href']
+                                 ,'description':''
+                                })
+           for itt in majorf.findAll('li'):
+               itema = itt.find('a',href=True)
+               if itema:
+                  pfarticles.append({
+                                      'title'      :self.tag_to_string(itema)
+                                     ,'date'       :strftime(self.timefmt)
+                                     ,'url'        :'http://www.wired.com' + itema['href']
+                                     ,'description':''
+                                    })
+           totalfeeds.append(('Cover', pfarticles))
        features = soup.find('div',attrs={'id':'my-glider'})
        if features:
           farticles = []
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@ -0,0 +1,44 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Wired_Daily(BasicNewsRecipe):
+
+    title = 'Wired Daily Edition'
+    __author__ = 'Kovid Goyal'
+    description = 'Technology news'
+    timefmt  = ' [%Y%b%d  %H%M]'
+    language = 'en'
+
+    no_stylesheets = True
+
+    remove_tags_before = dict(name='div', id='content')
+    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
+        'footer', 'advertisement', 'blog_subscription_unit',
+        'brightcove_component']),
+        {'class':'entryActions'},
+        dict(name=['noscript', 'script'])]
+
+    feeds = [
+        ('Top News', 'http://feeds.wired.com/wired/index'),
+        ('Culture', 'http://feeds.wired.com/wired/culture'),
+        ('Software', 'http://feeds.wired.com/wired/software'),
+        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
+        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
+        ('Cars', 'http://feeds.wired.com/wired/cars'),
+        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
+        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
+        ('Science', 'http://feeds.wired.com/wired/science'),
+        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
+        ('Politics', 'http://feeds.wired.com/wired/politics'),
+        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
+        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+        ]
+
+    def print_version(self, url):
+        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+
+
--- a/resources/recipes/wired_uk.recipe
+++ b/resources/recipes/wired_uk.recipe
@ -0,0 +1,74 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.wired.co.uk
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Wired_UK(BasicNewsRecipe):
+    title                 = 'Wired Magazine - UK edition'
+    __author__            = 'Darko Miletic'
+    description           = 'Gaming news'
+    publisher             = 'Conde Nast Digital'
+    category              = 'news, games, IT, gadgets'
+    oldest_article        = 32
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    masthead_url          = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
+    language              = 'en_GB'
+    extra_css             = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
+    index                 = 'http://www.wired.co.uk/wired-magazine.aspx'
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
+    remove_tags = [
+                     dict(name=['object','embed','iframe','link'])
+                    ,dict(attrs={'class':['opts','comment','stories']})
+                  ]
+    remove_tags_after = dict(name='div',attrs={'class':'stories'})
+    remove_attributes = ['height','width']
+
+
+    def parse_index(self):
+        totalfeeds = []
+        soup   = self.index_to_soup(self.index)
+        maincontent = soup.find('div',attrs={'class':'main-content'})
+        mfeed = []
+        if maincontent:
+           st = maincontent.find(attrs={'class':'most-wired-box'})
+           if st:
+              for itt in st.findAll('a',href=True):
+               url   = 'http://www.wired.co.uk' + itt['href']
+               title = self.tag_to_string(itt)
+               description = ''
+               date  = strftime(self.timefmt)
+               mfeed.append({
+                                  'title'      :title
+                                 ,'date'       :date
+                                 ,'url'        :url
+                                 ,'description':description
+                                })
+        totalfeeds.append(('Articles', mfeed))
+        return totalfeeds
+
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup(self.index)
+        cover_item = soup.find('span', attrs={'class':'cover'})
+        if cover_item:
+           cover_url = cover_item.img['src']
+        return cover_url
+
+    def print_version(self, url):
+        return url + '?page=all'
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
                br.select_form(nr=0)
                br['user']   = self.username
                br['password'] = self.password
-                br.submit()
+                res = br.submit()
+                raw = res.read()
+                if 'Welcome,' not in raw:
+                    raise ValueError('Failed to log in to wsj.com, check your '
+                            'username and password')
            return br

        def postprocess_html(self, soup, first):
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
            soup = self.wsj_get_index()

            year = strftime('%Y')
-            for x in soup.findAll('td', attrs={'class':'b14'}):
+            for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
                txt = self.tag_to_string(x).strip()
+                txt = txt.replace(u'\xa0', ' ')
+                txt = txt.encode('ascii', 'ignore')
                if year in txt:
                    self.timefmt = ' [%s]'%txt
                    break
--- a/resources/tanea.recipe
+++ b/resources/tanea.recipe
@ -0,0 +1,30 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class TaNea(BasicNewsRecipe):
+    title          = u'Ta Nea'
+    __author__             = 'Pan'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    no_stylesheets         = True
+
+    remove_tags_before = dict(name='div',attrs={'id':'print-body'})
+    remove_tags_after = dict(name='div',attrs={'id':'text'})
+
+    feeds = [
+        (u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
+        u'http://www.tanea.gr/default.asp?pid=66&la=1'),
+        (u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=67&la=1'),
+        (u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
+        u'http://www.tanea.gr/default.asp?pid=68&la=1'),
+        (u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=69&la=1'),
+        (u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=79&la=1'),
+        (u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=80&la=1'),
+        (u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=81&la=1')]
+
+    def print_version(self, url):
+        return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
--- a/resources/viewer/bookmarks.js
+++ b/resources/viewer/bookmarks.js
@ -20,37 +20,8 @@ function selector(elem) {
    return sel;
 }

-function find_closest_enclosing_block(top) {
-    var START = top-1000;
-    var STOP = top;
-    var matches = [];
-    var elem, temp;
-    var width = 1000;
-
-    for (y = START; y < STOP; y += 20) {
-        for ( x = 0; x < width; x += 20) {
-            elem = document.elementFromPoint(x, y);
-            try {
-                elem = $(elem);
-                temp = elem.offset().top
-                matches.push(elem);
-                if (Math.abs(temp - START) < 25) { y = STOP; break}
-            } catch(error) {}
-        }
-    }
-
-    var miny = Math.abs(matches[0].offset().top - START), min_elem = matches[0];
-
-    for (i = 1; i < matches.length; i++) {
-        elem = matches[i];
-        temp = Math.abs(elem.offset().top - START);
-        if ( temp < miny ) { miny = temp; min_elem = elem; }
-    }
-    return min_elem;
-}
-
-function calculate_bookmark(y) {
-    var elem = find_closest_enclosing_block(y);
+function calculate_bookmark(y, node) {
+    var elem = $(node);
    var sel = selector(elem);
    var ratio = (y - elem.offset().top)/elem.height();
    if (ratio > 1) { ratio = 1; }
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
            objects.append(obj)

        if self.newer(dest, objects):
-            cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
+            cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
            '-lpng', '-lpthread']
            if iswindows:
                cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
--- a/setup/install.py
+++ b/setup/install.py
@ -137,8 +137,20 @@ class Develop(Command):
        self.setup_mount_helper()
        self.install_files()
        self.run_postinstall()
+        self.install_env_module()
        self.success()

+    def install_env_module(self):
+        import distutils.sysconfig as s
+        libdir = s.get_python_lib(prefix=self.opts.staging_root)
+        if os.path.exists(libdir):
+            path = os.path.join(libdir, 'init_calibre.py')
+            self.info('Installing calibre environment module: '+path)
+            with open(path, 'wb') as f:
+                f.write(HEADER.format(**self.template_args()))
+        else:
+            self.warn('Cannot install calibre environment module to: '+libdir)
+
    def setup_mount_helper(self):
        def warn():
            self.warn('Failed to compile mount helper. Auto mounting of',
@ -180,13 +192,20 @@ class Develop(Command):
                    functions[typ]):
                self.write_template(name, mod, func)

+    def template_args(self):
+        return {
+            'path':self.libdir,
+            'resources':self.sharedir,
+            'executables':self.bindir,
+            'extensions':self.j(self.libdir, 'calibre', 'plugins')
+            }
+
    def write_template(self, name, mod, func):
        template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
-        script = template.format(
-                module=mod, func=func,
-                path=self.libdir, resources=self.sharedir,
-                executables=self.bindir,
-                extensions=self.j(self.libdir, 'calibre', 'plugins'))
+        args = self.template_args()
+        args['module'] = mod
+        args['func'] = func
+        script = template.format(**args)
        path = self.j(self.staging_bindir, name)
        if not os.path.exists(self.staging_bindir):
            os.makedirs(self.staging_bindir)
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -15,7 +15,7 @@ class Rsync(Command):

    description = 'Sync source tree from development machine'

-    SYNC_CMD = ('rsync -avz --exclude src/calibre/plugins '
+    SYNC_CMD = ('rsync -avz --delete --exclude src/calibre/plugins '
               '--exclude src/calibre/manual --exclude src/calibre/trac '
               '--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
               '--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -378,10 +378,11 @@ def strftime(fmt, t=None):
        t = time.localtime()
    early_year = t[0] < 1900
    if early_year:
+        replacement = 1900 if t[0]%4 == 0 else 1901
        fmt = fmt.replace('%Y', '_early year hack##')
        t = list(t)
        orig_year = t[0]
-        t[0] = 1900
+        t[0] = replacement
    ans = None
    if iswindows:
        if isinstance(fmt, unicode):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.37'
+__version__   = '0.6.42'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -139,12 +139,11 @@ class FileTypePlugin(Plugin):
    #: to the database
    on_import      = False

-    #: If True, this plugin is run whenever an any2* tool
-    #: is used, on the file passed to the any2* tool.
+    #: If True, this plugin is run just before a conversion
    on_preprocess  = False

-    #: If True, this plugin is run after an any2* tool is
-    #: used, on the final file produced by the tool.
+    #: If True, this plugin is run after conversion
+    #: on the final file produced by the conversion output plugin.
    on_postprocess = False

    type = _('File type')
@ -249,6 +248,7 @@ class CatalogPlugin(Plugin):
    #:                       dest = 'catalog_title',
    #:                       help = (_('Title of generated catalog. \nDefault:') + " '" +
    #:                       '%default' + "'"))]
+    #: cli_options parsed in library.cli:catalog_option_parser()

    cli_options = []

@ -275,9 +275,10 @@ class CatalogPlugin(Plugin):
    def get_output_fields(self, opts):
        # Return a list of requested fields, with opts.sort_by first
        all_fields = set(
-                          ['author_sort','authors','comments','cover','formats',                           'id','isbn','pubdate','publisher','rating',
-                          'series_index','series','size','tags','timestamp',
-                          'title','uuid'])
+                          ['author_sort','authors','comments','cover','formats',
+                           'id','isbn','pubdate','publisher','rating',
+                           'series_index','series','size','tags','timestamp',
+                           'title','uuid'])

        fields = all_fields
        if opts.fields != 'all':
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -7,6 +7,7 @@ import os
 import glob
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
 from calibre.constants import numeric_version
+from calibre.ebooks.metadata.archive import ArchiveExtract

 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
@ -404,9 +405,10 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
-                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
+                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
+                BOOQ
 from calibre.devices.iliad.driver import ILIAD
-from calibre.devices.irexdr.driver import IREXDR1000
+from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
@ -418,11 +420,11 @@ from calibre.devices.eslick.driver import ESLICK
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
-from calibre.devices.hanvon.driver import N516
+from calibre.devices.hanvon.driver import N516, EB511

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
-plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
+plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
 plugins += [
    ComicInput,
    EPUBInput,
@ -464,6 +466,7 @@ plugins += [
    CYBOOK,
    ILIAD,
    IREXDR1000,
+    IREXDR800,
    JETBOOK,
    SHINEBOOK,
    POCKETBOOK360,
@ -487,9 +490,11 @@ plugins += [
    DBOOK,
    INVESBOOK,
    BOOX,
+    BOOQ,
    EB600,
    README,
    N516,
+    EB511,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -20,7 +20,7 @@ class ANDROID(USBMS):
    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
            0x22b8 : { 0x41d9 : [0x216]},
-            0x18d1 : { 0x4e11 : [0x0100]},
+            0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]},
            }
    EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -184,3 +184,14 @@ class INVESBOOK(EB600):
    VENDOR_NAME = 'INVES_E6'
    WINDOWS_MAIN_MEM = '00INVES_E600'
    WINDOWS_CARD_A_MEM = '00INVES_E600'
+
+class BOOQ(EB600):
+    name = 'Booq Device Interface'
+    gui_name = 'Booq'
+
+    FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']
+
+    VENDOR_NAME = 'NETRONIX'
+    WINDOWS_MAIN_MEM = 'EB600'
+    WINDOWS_CARD_A_MEM = 'EB600'
+
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -126,3 +126,15 @@ class BOOX(HANLINV3):

    EBOOK_DIR_MAIN = 'MyBooks'
    EBOOK_DIR_CARD_A = 'MyBooks'
+
+
+    def windows_sort_drives(self, drives):
+        main = drives.get('main', None)
+        card = drives.get('carda', None)
+        if card and main and card < main:
+            drives['main'] = card
+            drives['carda'] = main
+
+        return drives
+
+
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Hanvon devices
 '''
+import re

 from calibre.devices.usbms.driver import USBMS

@ -32,3 +33,25 @@ class N516(USBMS):

    EBOOK_DIR_MAIN = 'e_book'
    SUPPORTS_SUB_DIRS = True
+
+class EB511(USBMS):
+    name           = 'Elonex EB 511 driver'
+    gui_name       = 'EB 511'
+    description    = _('Communicate with the Elonex EB 511 eBook reader.')
+    author         = 'Kovid Goyal'
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    FORMATS     = ['epub', 'html', 'pdf', 'txt']
+
+    VENDOR_ID   = [0x45e]
+    PRODUCT_ID  = [0xffff]
+    BCD         = [0x0]
+
+    MAIN_MEMORY_VOLUME_LABEL  = 'EB 511 Internal Memory'
+
+    EBOOK_DIR_MAIN = 'e_book'
+    SUPPORTS_SUB_DIRS = True
+
+    OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
+
+
--- a/src/calibre/devices/irexdr/driver.py
+++ b/src/calibre/devices/irexdr/driver.py
@ -36,3 +36,14 @@ class IREXDR1000(USBMS):
    EBOOK_DIR_MAIN = 'ebooks'
    DELETE_EXTS = ['.mbp']
    SUPPORTS_SUB_DIRS = True
+
+class IREXDR800(IREXDR1000):
+    name           = 'IRex Digital Reader 800 Device Interface'
+    description    = _('Communicate with the IRex Digital Reader 800')
+    PRODUCT_ID = [0x002]
+    WINDOWS_MAIN_MEM = 'DR800'
+    FORMATS     = ['epub', 'html', 'pdf', 'txt']
+
+    EBOOK_DIR_MAIN = 'Books'
+    DELETE_EXTS = []
+
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -192,17 +192,15 @@ class PRS505(CLI, Device):
        fix_ids(*booklists)
        if not os.path.exists(self._main_prefix):
            os.makedirs(self._main_prefix)
-        f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
-        booklists[0].write(f)
-        f.close()
+        with open(self._main_prefix + self.__class__.MEDIA_XML, 'wb') as f:
+            booklists[0].write(f)

        def write_card_prefix(prefix, listid):
            if prefix is not None and hasattr(booklists[listid], 'write'):
                if not os.path.exists(prefix):
                    os.makedirs(prefix)
-                f = open(prefix + self.__class__.CACHE_XML, 'wb')
-                booklists[listid].write(f)
-                f.close()
+                with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
+                    booklists[listid].write(f)
        write_card_prefix(self._card_a_prefix, 1)
        write_card_prefix(self._card_b_prefix, 2)

--- a/src/calibre/devices/usbms/cli.py
+++ b/src/calibre/devices/usbms/cli.py
@ -4,8 +4,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import os
-import shutil
+import os, shutil, time

 from calibre.devices.errors import PathError

@ -50,11 +49,12 @@ class CLI(object):
        d = os.path.dirname(path)
        if not os.path.exists(d):
            os.makedirs(d)
-        with open(path, 'wb') as dest:
+        with open(path, 'w+b') as dest:
            try:
                shutil.copyfileobj(infile, dest)
            except IOError:
                print 'WARNING: First attempt to send file to device failed'
+                time.sleep(0.2)
                infile.seek(0)
                dest.seek(0)
                dest.truncate()
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
        if href and os.access(path, os.R_OK):
            return open(path, 'rb').read()

+def extract_calibre_cover(raw, base, log):
+    from calibre.ebooks.BeautifulSoup import BeautifulSoup
+    soup = BeautifulSoup(raw)
+    matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
+        'font', 'br'])
+    images = soup.findAll('img')
+    if matches is None and len(images) == 1 and \
+            images[0].get('alt', '')=='cover':
+        img = images[0]
+        img = os.path.join(base, *img['src'].split('/'))
+        if os.path.exists(img):
+            return open(img, 'rb').read()
+
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
                   os.path.dirname(path_to_html), log)
        except:
            pass
+    if data is None:
+        try:
+            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
+        except:
+            pass
    if data is None:
        renderer = render_html(path_to_html, width, height)
        data = getattr(renderer, 'data', None)
--- a/src/calibre/ebooks/chm/init.py
+++ b/src/calibre/ebooks/chm/init.py
@ -0,0 +1,8 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Used for chm input
+'''
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -0,0 +1,384 @@
+from __future__ import with_statement
+''' CHM File decoding support '''
+__license__ = 'GPL v3'
+__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
+                 ' and Alex Bramley <a.bramley at gmail.com>.'
+
+import os, shutil, uuid
+from tempfile import mkdtemp
+from mimetypes import guess_type as guess_mimetype
+
+from BeautifulSoup import BeautifulSoup
+from lxml import html
+from pychm.chm import CHMFile
+from pychm.chmlib import (
+  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
+  chm_enumerate,
+)
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.utils.config import OptionParser
+from calibre.ebooks.metadata.toc import TOC
+from calibre.utils.localization import get_lang
+from calibre.utils.filenames import ascii_filename
+
+
+def match_string(s1, s2_already_lowered):
+    if s1 is not None and s2_already_lowered is not None:
+        if s1.lower()==s2_already_lowered:
+            return True
+    return False
+
+def option_parser():
+    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
+    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
+    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
+    parser.add_option("-t", "--title", action="store", type="string", \
+                    dest="title", help=_("Set the book title"))
+    parser.add_option('--title-sort', action='store', type='string', default=None,
+                      dest='title_sort', help=_('Set sort key for the title'))
+    parser.add_option("-a", "--author", action="store", type="string", \
+                    dest="author", help=_("Set the author"))
+    parser.add_option('--author-sort', action='store', type='string', default=None,
+                      dest='author_sort', help=_('Set sort key for the author'))
+    parser.add_option("-c", "--category", action="store", type="string", \
+                    dest="category", help=_("The category this book belongs"
+                    " to. E.g.: History"))
+    parser.add_option("--thumbnail", action="store", type="string", \
+                    dest="thumbnail", help=_("Path to a graphic that will be"
+                    " set as this files' thumbnail"))
+    parser.add_option("--comment", action="store", type="string", \
+                    dest="freetext", help=_("Path to a txt file containing a comment."))
+    parser.add_option("--get-thumbnail", action="store_true", \
+                    dest="get_thumbnail", default=False, \
+                    help=_("Extract thumbnail from LRF file"))
+    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
+    parser.add_option('--classification', default=None, help=_('Set the book classification'))
+    parser.add_option('--creator', default=None, help=_('Set the book creator'))
+    parser.add_option('--producer', default=None, help=_('Set the book producer'))
+    parser.add_option('--get-cover', action='store_true', default=False,
+                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
+    parser.add_option('--bookid', action='store', type='string', default=None,
+                      dest='book_id', help=_('Set book ID'))
+    parser.add_option('--font-delta', action='store', type='int', default=0,
+                      dest='font_delta', help=_('Set font delta'))
+    return parser
+
+class CHMError(Exception):
+    pass
+
+class CHMReader(CHMFile):
+    def __init__(self, input, log):
+        CHMFile.__init__(self)
+        if not self.LoadCHM(input):
+            raise CHMError("Unable to open CHM file '%s'"%(input,))
+        self.log = log
+        self._sourcechm = input
+        self._contents = None
+        self._playorder = 0
+        self._metadata = False
+        self._extracted = False
+
+        # location of '.hhc' file, which is the CHM TOC.
+        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
+        self.hhc_path = self.root + ".hhc"
+
+
+    def _parse_toc(self, ul, basedir=os.getcwdu()):
+        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
+        self._playorder += 1
+        for li in ul('li', recursive=False):
+            href = li.object('param', {'name': 'Local'})[0]['value']
+            if href.count('#'):
+                href, frag = href.split('#')
+            else:
+                frag = None
+            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
+            #print "========>", name
+            toc.add_item(href, frag, name, play_order=self._playorder)
+            self._playorder += 1
+            if li.ul:
+               child = self._parse_toc(li.ul)
+               child.parent = toc
+               toc.append(child)
+        #print toc
+        return toc
+
+
+    def GetFile(self, path):
+        # have to have abs paths for ResolveObject, but Contents() deliberately
+        # makes them relative. So we don't have to worry, re-add the leading /.
+        # note this path refers to the internal CHM structure
+        if path[0] != '/':
+            path = '/' + path
+        res, ui = self.ResolveObject(path)
+        if res != CHM_RESOLVE_SUCCESS:
+            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
+        size, data = self.RetrieveObject(ui)
+        if size == 0:
+            raise CHMError("'%s' is zero bytes in length!"%(path,))
+        return data
+
+    def ExtractFiles(self, output_dir=os.getcwdu()):
+        for path in self.Contents():
+            lpath = os.path.join(output_dir, path)
+            self._ensure_dir(lpath)
+            data = self.GetFile(path)
+            with open(lpath, 'wb') as f:
+                if guess_mimetype(path)[0] == ('text/html'):
+                    data = self._reformat(data)
+                f.write(data)
+        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
+        self._extracted = True
+
+    def _reformat(self, data):
+        try:
+            soup = BeautifulSoup(data)
+        except UnicodeEncodeError:
+            # hit some strange encoding problems...
+            print "Unable to parse html for cleaning, leaving it :("
+            return data
+        # nuke javascript...
+        [s.extract() for s in soup('script')]
+        # remove forward and back nav bars from the top/bottom of each page
+        # cos they really fuck with the flow of things and generally waste space
+        # since we can't use [a,b] syntax to select arbitrary items from a list
+        # we'll have to do this manually...
+        t = soup('table')
+        if t:
+            if (t[0].previousSibling is None
+              or t[0].previousSibling.previousSibling is None):
+                t[0].extract()
+            if (t[-1].nextSibling is None
+              or t[-1].nextSibling.nextSibling is None):
+                t[-1].extract()
+        # for some very odd reason each page's content appears to be in a table
+        # too. and this table has sub-tables for random asides... grr.
+
+        # some images seem to be broken in some chm's :/
+        for img in soup('img'):
+            try:
+                # some are supposedly "relative"... lies.
+                while img['src'].startswith('../'): img['src'] = img['src'][3:]
+                # some have ";<junk>" at the end.
+                img['src'] = img['src'].split(';')[0]
+            except KeyError:
+                # and some don't even have a src= ?!
+                pass
+        # now give back some pretty html.
+        return soup.prettify()
+
+    def Contents(self):
+        if self._contents is not None:
+            return self._contents
+        paths = []
+        def get_paths(chm, ui, ctx):
+            # skip directories
+            # note this path refers to the internal CHM structure
+            if ui.path[-1] != '/':
+                # and make paths relative
+                paths.append(ui.path.lstrip('/'))
+        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
+        self._contents = paths
+        return self._contents
+
+    def _ensure_dir(self, path):
+        dir = os.path.dirname(path)
+        if not os.path.isdir(dir):
+            os.makedirs(dir)
+
+    def extract_content(self, output_dir=os.getcwdu()):
+        self.ExtractFiles(output_dir=output_dir)
+
+
+class CHMInput(InputFormatPlugin):
+
+    name        = 'CHM Input'
+    author      = 'Kovid Goyal and Alex Bramley'
+    description = 'Convert CHM files to OEB'
+    file_types  = set(['chm'])
+
+    options = set([
+        OptionRecommendation(name='dummy_option', recommended_value=False,
+            help=_('dummy option until real options are determined.')),
+    ])
+
+    def _chmtohtml(self, output_dir, chm_path, no_images, log):
+        log.debug('Opening CHM file')
+        rdr = CHMReader(chm_path, log)
+        log.debug('Extracting CHM to %s' % output_dir)
+        rdr.extract_content(output_dir)
+        return rdr.hhc_path
+
+
+    def convert(self, stream, options, file_ext, log, accelerators):
+        from calibre.ebooks.metadata.chm import get_metadata_
+
+        log.debug('Processing CHM...')
+        tdir = mkdtemp(prefix='chm2oeb_')
+        from calibre.customize.ui import plugin_for_input_format
+        html_input = plugin_for_input_format('html')
+        for opt in html_input.options:
+            setattr(options, opt.option.name, opt.recommended_value)
+        options.input_encoding = 'utf-8'
+        no_images = False #options.no_images
+        chm_name = stream.name
+        #chm_data = stream.read()
+
+        #closing stream so CHM can be opened by external library
+        stream.close()
+        log.debug('tdir=%s' % tdir)
+        log.debug('stream.name=%s' % stream.name)
+        mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+        mainpath = os.path.join(tdir, mainname)
+
+        metadata = get_metadata_(tdir)
+
+        odi = options.debug_pipeline
+        options.debug_pipeline = None
+        # try a custom conversion:
+        #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
+        # try using html converter:
+        htmlpath = self._create_html_root(mainpath, log)
+        oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
+        options.debug_pipeline = odi
+        #log.debug('DEBUG: Not removing tempdir %s' % tdir)
+        shutil.rmtree(tdir)
+        return oeb
+
+    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
+        # use HTMLInput plugin to generate book
+        from calibre.ebooks.html.input import HTMLInput
+        opts.breadth_first = True
+        htmlinput = HTMLInput(None)
+        oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
+        return oeb
+
+
+    def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
+        from calibre.ebooks.conversion.plumber import create_oebbook
+        from calibre.ebooks.oeb.base import DirContainer
+        oeb = create_oebbook(log, None, opts, self,
+                encoding=opts.input_encoding, populate=False)
+        self.oeb = oeb
+
+        metadata = oeb.metadata
+        if mi.title:
+            metadata.add('title', mi.title)
+        if mi.authors:
+            for a in mi.authors:
+                metadata.add('creator', a, attrib={'role':'aut'})
+        if mi.publisher:
+            metadata.add('publisher', mi.publisher)
+        if mi.isbn:
+            metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
+        if not metadata.language:
+            oeb.logger.warn(u'Language not specified')
+            metadata.add('language', get_lang())
+        if not metadata.creator:
+            oeb.logger.warn('Creator not specified')
+            metadata.add('creator', _('Unknown'))
+        if not metadata.title:
+            oeb.logger.warn('Title not specified')
+            metadata.add('title', _('Unknown'))
+
+        bookid = str(uuid.uuid4())
+        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
+        for ident in metadata.identifier:
+            if 'id' in ident.attrib:
+                self.oeb.uid = metadata.identifier[0]
+                break
+
+        hhcdata = self._read_file(hhcpath)
+        hhcroot = html.fromstring(hhcdata)
+        chapters = self._process_nodes(hhcroot)
+        #print "============================="
+        #print "Printing hhcroot"
+        #print etree.tostring(hhcroot, pretty_print=True)
+        #print "============================="
+        log.debug('Found %d section nodes' % len(chapters))
+
+        if len(chapters) > 0:
+            path0 = chapters[0][1]
+            subpath = os.path.dirname(path0)
+            htmlpath = os.path.join(basedir, subpath)
+
+            oeb.container = DirContainer(htmlpath, log)
+            for chapter in chapters:
+                title = chapter[0]
+                basename = os.path.basename(chapter[1])
+                self._add_item(oeb, title, basename)
+
+            oeb.container = DirContainer(htmlpath, oeb.log)
+        return oeb
+
+    def _create_html_root(self, hhcpath, log):
+        hhcdata = self._read_file(hhcpath)
+        hhcroot = html.fromstring(hhcdata)
+        chapters = self._process_nodes(hhcroot)
+        #print "============================="
+        #print "Printing hhcroot"
+        #print etree.tostring(hhcroot, pretty_print=True)
+        #print "============================="
+        log.debug('Found %d section nodes' % len(chapters))
+        htmlpath = os.path.splitext(hhcpath)[0] + ".html"
+        f = open(htmlpath, 'wb')
+        f.write("<HTML><HEAD></HEAD><BODY>\r\n")
+
+        if chapters:
+            path0 = chapters[0][1]
+            subpath = os.path.dirname(path0)
+
+            for chapter in chapters:
+                title = chapter[0]
+                rsrcname = os.path.basename(chapter[1])
+                rsrcpath = os.path.join(subpath, rsrcname)
+                # title should already be url encoded
+                url = "<br /><a href=" + rsrcpath + ">" + title + " </a>\r\n"
+                f.write(url)
+
+        f.write("</BODY></HTML>")
+        f.close()
+        return htmlpath
+
+
+    def _read_file(self, name):
+        f = open(name, 'rb')
+        data = f.read()
+        f.close()
+        return data
+
+    def _visit_node(self, node, chapters, depth):
+        # check that node is a normal node (not a comment, DOCTYPE, etc.)
+        # (normal nodes have string tags)
+        if isinstance(node.tag, basestring):
+            if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
+                for child in node:
+                    if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
+                        chapter_title = child.attrib['value']
+                    if match_string(child.tag,'param') and match_string(child.attrib['name'],'local'):
+                        chapter_path = child.attrib['value']
+                if chapter_title is not None and chapter_path is not None:
+                    chapter = [chapter_title, chapter_path, depth]
+                    chapters.append(chapter)
+            if node.tag=="UL":
+                depth = depth + 1
+            if node.tag=="/UL":
+                depth = depth - 1
+
+    def _process_nodes(self, root):
+        chapters = []
+        depth = 0
+        for node in root.iter():
+            self._visit_node(node, chapters, depth)
+        return chapters
+
+    def _add_item(self, oeb, title, path):
+        bname = os.path.basename(path)
+        id, href = oeb.manifest.generate(id='html',
+                href=ascii_filename(bname))
+        item = oeb.manifest.add(id, href, 'text/html')
+        item.html_input_href = bname
+        oeb.spine.add(item, True)
+        oeb.toc.add(title, item.href)
+
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -233,14 +233,18 @@ def create_option_parser(args, log):

    return parser, plumber

+def abspath(x):
+    if x.startswith('http:') or x.startswith('https:'):
+        return x
+    return os.path.abspath(os.path.expanduser(x))
+
 def main(args=sys.argv):
    log = Log()
    parser, plumber = create_option_parser(args, log)
    opts = parser.parse_args(args)[0]
-    y = lambda q : os.path.abspath(os.path.expanduser(q))
    for x in ('read_metadata_from_opf', 'cover'):
        if getattr(opts, x, None) is not None:
-            setattr(opts, x, y(getattr(opts, x)))
+            setattr(opts, x, abspath(getattr(opts, x)))
    recommendations = [(n.dest, getattr(opts, n.dest),
                        OptionRecommendation.HIGH) \
                                        for n in parser.options_iter()
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
        run_plugins_on_preprocess, run_plugins_on_postprocess
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.date import parse_date
 from calibre import extract, walk

 DEBUG_README=u'''
@ -65,7 +66,7 @@ class Plumber(object):
    metadata_option_names = [
        'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
        'publisher', 'series', 'series_index', 'rating', 'isbn',
-        'tags', 'book_producer', 'language'
+        'tags', 'book_producer', 'language', 'pubdate', 'timestamp'
        ]

    def __init__(self, input, output, log, report_progress=DummyReporter(),
@ -423,7 +424,7 @@ OptionRecommendation(name='author_sort',

 OptionRecommendation(name='cover',
    recommended_value=None, level=OptionRecommendation.LOW,
-    help=_('Set the cover to the specified file.')),
+    help=_('Set the cover to the specified file or URL')),

 OptionRecommendation(name='comments',
    recommended_value=None, level=OptionRecommendation.LOW,
@ -461,6 +462,14 @@ OptionRecommendation(name='language',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the language.')),

+OptionRecommendation(name='pubdate',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the publication date.')),
+
+OptionRecommendation(name='timestamp',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the book timestamp (used by the date column in calibre).')),
+
 ]

        input_fmt = os.path.splitext(self.input)[1]
@ -619,8 +628,30 @@ OptionRecommendation(name='language',
                    except ValueError:
                        self.log.warn(_('Values of series index and rating must'
                        ' be numbers. Ignoring'), val)
+                        continue
+                elif x in ('timestamp', 'pubdate'):
+                    try:
+                        val = parse_date(val, assume_utc=x=='pubdate')
+                    except:
+                        self.log.exception(_('Failed to parse date/time') + ' ' +
+                                unicode(val))
+                        continue
                setattr(mi, x, val)

+    def download_cover(self, url):
+        from calibre import browser
+        from PIL import Image
+        from cStringIO import StringIO
+        from calibre.ptempfile import PersistentTemporaryFile
+        self.log('Downloading cover from %r'%url)
+        br = browser()
+        raw = br.open_novisit(url).read()
+        buf = StringIO(raw)
+        pt = PersistentTemporaryFile('.jpg')
+        pt.close()
+        img = Image.open(buf)
+        img.convert('RGB').save(pt.name)
+        return pt.name

    def read_user_metadata(self):
        '''
@ -638,6 +669,8 @@ OptionRecommendation(name='language',
            mi = MetaInformation(opf)
        self.opts_to_mi(mi)
        if mi.cover:
+            if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
+                mi.cover = self.download_cover(mi.cover)
            mi.cover_data = ('', open(mi.cover, 'rb').read())
            mi.cover = None
        self.user_metadata = mi
@ -753,6 +786,7 @@ OptionRecommendation(name='language',
            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
                    self.input_plugin)
        self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
+        self.opts.is_image_collection = self.input_plugin.is_image_collection
        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
        self.flush()
        if self.opts.debug_pipeline is not None:
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin):

        self.rationalize_cover(opf, log)

+        self.optimize_opf_parsing = opf
+
        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin):
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
-        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
+        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
+
+        # ADE cries big wet tears when it encounters an invalid fragment
+        # identifier in the NCX toc.
+        frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
+        for node in self.oeb.toc.iter():
+            href = getattr(node, 'href', None)
+            if hasattr(href, 'partition'):
+                base, _, frag = href.partition('#')
+                frag = urlunquote(frag)
+                if frag and frag_pat.match(frag) is None:
+                    self.log.warn(
+                            'Removing invalid fragment identifier %r from TOC'%frag)
+                    node.href = base

        for x in self.oeb.spine:
            root = x.data
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -111,7 +111,7 @@ class HTMLFile(object):
                raise IOError(msg)
            raise IgnoreFile(msg, err.errno)

-        self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
+        self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
        if not self.is_binary:
            if encoding is None:
                encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
@ -408,7 +408,10 @@ class HTMLInput(InputFormatPlugin):
            return link_
        if base and not os.path.isabs(link):
            link = os.path.join(base, link)
-        link = os.path.abspath(link)
+        try:
+            link = os.path.abspath(link)
+        except:
+            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
--- a/src/calibre/ebooks/lrf/pylrs/pylrs.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py
@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
        STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
        BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
        STREAM_FORCE_COMPRESSED)
+from calibre.utils.date import isoformat

 DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
 DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs
@ -852,7 +853,7 @@ class DocInfo(object):
        self.thumbnail = None
        self.language = "en"
        self.creator  = None
-        self.creationdate = date.today().isoformat()
+        self.creationdate = str(isoformat(date.today()))
        self.producer = "%s v%s"%(__appname__, __version__)
        self.numberofpages = "0"

--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -10,9 +10,11 @@ import os, mimetypes, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse

-
 from calibre import relpath

+from calibre.utils.config import tweaks
+from calibre.utils.date import isoformat
+
 _author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
 def string_to_authors(raw):
    raw = raw.replace('&&', u'\uffff')
@ -27,6 +29,9 @@ def authors_to_string(authors):
        return ''

 def author_to_author_sort(author):
+    method = tweaks['author_sort_copy_method']
+    if method == 'copy' or (method == 'comma' and author.count(',') > 0):
+        return author
    tokens = author.split()
    tokens = tokens[-1:] + tokens[:-1]
    if len(tokens) > 1:
@ -340,9 +345,9 @@ class MetaInformation(object):
        if self.rating is not None:
            fmt('Rating', self.rating)
        if self.timestamp is not None:
-            fmt('Timestamp', self.timestamp.isoformat(' '))
+            fmt('Timestamp', isoformat(self.timestamp))
        if self.pubdate is not None:
-            fmt('Published', self.pubdate.isoformat(' '))
+            fmt('Published', isoformat(self.pubdate))
        if self.rights is not None:
            fmt('Rights', unicode(self.rights))
        if self.lccn:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en'
 Fetch metadata using Amazon AWS
 '''
 import sys, re
-from datetime import datetime

 from lxml import etree
-from dateutil import parser

 from calibre import browser
+from calibre.utils.date import parse_date, utcnow
 from calibre.ebooks.metadata import MetaInformation, string_to_authors

 AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn):
        try:
            d = root.findtext('.//'+AWS('PublicationDate'))
            if d:
-                default = datetime.utcnow()
-                default = datetime(default.year, default.month, 15)
-                d = parser.parse(d[0].text, default=default)
+                default = utcnow().replace(day=15)
+                d = parse_date(d[0].text, assume_utc=True, default=default)
                mi.pubdate = d
        except:
            pass
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from contextlib import closing
+
+from calibre.customize import FileTypePlugin
+
+def is_comic(list_of_names):
+    extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names])
+    return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png')
+
+class ArchiveExtract(FileTypePlugin):
+    name = 'Archive Extract'
+    author = 'Kovid Goyal'
+    description = _('Extract common e-book formats from archives '
+        '(zip/rar) files. Also try to autodetect if they are actually '
+        'cbz/cbr files.')
+    file_types = set(['zip', 'rar'])
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+
+    def run(self, archive):
+        is_rar = archive.lower().endswith('.rar')
+        if is_rar:
+            from calibre.libunrar import extract_member, names
+        else:
+            from calibre.utils.zipfile import ZipFile
+            zf = ZipFile(archive, 'r')
+
+        if is_rar:
+            fnames = names(archive)
+        else:
+            fnames = zf.namelist()
+
+        fnames = [x for x in fnames if '.' in x]
+        if is_comic(fnames):
+            ext = '.cbr' if is_rar else '.cbz'
+            of = self.temporary_file('_archive_extract'+ext)
+            with open(archive, 'rb') as f:
+                of.write(f.read())
+            of.close()
+            return of.name
+        if len(fnames) > 1 or not fnames:
+            return archive
+        fname = fnames[0]
+        ext = os.path.splitext(fname)[1][1:]
+        if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
+                'mp3', 'pdb', 'azw', 'azw1'):
+            return archive
+
+        of = self.temporary_file('_archive_extract.'+ext)
+        with closing(of):
+            if is_rar:
+                data = extract_member(archive, match=None, name=fname)[1]
+                of.write(data)
+            else:
+                of.write(zf.read(fname))
+        return of.name
+
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
                    title_sort, MetaInformation
 from calibre.ebooks.lrf.meta import LRFMetaFile
 from calibre import prints
+from calibre.utils.date import parse_date

 USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
 _('''
@ -69,6 +70,8 @@ def config():
              help=_('Set the book producer.'))
    c.add_opt('language', ['-l', '--language'],
              help=_('Set the language.'))
+    c.add_opt('pubdate', ['-d', '--date'],
+              help=_('Set the published date.'))

    c.add_opt('get_cover', ['--get-cover'],
              help=_('Get the cover from the ebook and save it at as the '
@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type):
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
+    if getattr(opts, 'pubdate', None) is not None:
+        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 '''Read meta information from epub files'''

-import os
+import os, re
 from cStringIO import StringIO
 from contextlib import closing

@ -29,15 +29,15 @@ class Container(dict):
    def __init__(self, stream=None):
        if not stream: return
        soup = BeautifulStoneSoup(stream.read())
-        container = soup.find('container')
+        container = soup.find(name=re.compile(r'container$', re.I))
        if not container:
-            raise OCFException("<container/> element missing")
+            raise OCFException("<container> element missing")
        if container.get('version', None) != '1.0':
            raise EPubException("unsupported version of OCF")
-        rootfiles = container.find('rootfiles')
+        rootfiles = container.find(re.compile(r'rootfiles$', re.I))
        if not rootfiles:
            raise EPubException("<rootfiles/> element missing")
-        for rootfile in rootfiles.findAll('rootfile'):
+        for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
            try:
                self[rootfile['media-type']] = rootfile['full-path']
            except KeyError:
@ -69,7 +69,7 @@ class OCFReader(OCF):
        self.opf_path = self.container[OPF.MIMETYPE]
        try:
            with closing(self.open(self.opf_path)) as f:
-                self.opf = OPF(f, self.root)
+                self.opf = OPF(f, self.root, populate_spine=False)
        except KeyError:
            raise EPubException("missing OPF package file")

@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
 def get_cover(opf, opf_path, stream):
    from calibre.ebooks import render_html_svg_workaround
    from calibre.utils.logging import default_log
-    spine = list(opf.spine_items())
-    if not spine:
+    cpage = opf.first_spine_item()
+    if not cpage:
        return
-    cpage = spine[0]
    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            stream.seek(0)
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en'
 import sys, textwrap
 from urllib import urlencode
 from functools import partial
-from datetime import datetime

 from lxml import etree
-from dateutil import parser

 from calibre import browser, preferred_encoding
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import OptionParser
+from calibre.utils.date import parse_date, utcnow

 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
@ -156,9 +155,8 @@ class ResultList(list):
        try:
            d = date(entry)
            if d:
-                default = datetime.utcnow()
-                default = datetime(default.year, default.month, 15)
-                d = parser.parse(d[0].text, default=default)
+                default = utcnow().replace(day=15)
+                d = parse_date(d[0].text, assume_utc=True, default=default)
            else:
                d = None
        except:
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -65,7 +65,22 @@ def _metadata_from_formats(formats):

    return mi

+def is_recipe(filename):
+    return filename.startswith('calibre') and \
+        filename.rpartition('.')[0].endswith('_recipe_out')
+
 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
+    pos = 0
+    if hasattr(stream, 'tell'):
+        pos = stream.tell()
+    try:
+        return _get_metadata(stream, stream_type, use_libprs_metadata)
+    finally:
+        if hasattr(stream, 'seek'):
+            stream.seek(pos)
+
+
+def _get_metadata(stream, stream_type, use_libprs_metadata):
    if stream_type: stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
        stream_type = 'html'
@ -84,11 +99,10 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
        return opf

    mi = MetaInformation(None, None)
-    if prefs['read_file_metadata']:
-        mi = get_file_type_metadata(stream, stream_type)
-
    name = os.path.basename(getattr(stream, 'name', ''))
    base = metadata_from_filename(name)
+    if is_recipe(name) or prefs['read_file_metadata']:
+        mi = get_file_type_metadata(stream, stream_type)
    if base.title == os.path.splitext(name)[0] and base.authors is None:
        # Assume that there was no metadata in the file and the user set pattern
        # to match meta info from the file name did not match.
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -11,13 +11,11 @@ __docformat__ = 'restructuredtext en'

 from struct import pack, unpack
 from cStringIO import StringIO
-from datetime import datetime

 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
-
-import struct
+from calibre.utils.date import now as nowf

 class StreamSlicer(object):

@ -87,6 +85,8 @@ class StreamSlicer(object):
        self._stream.truncate(value)

 class MetadataUpdater(object):
+    DRM_KEY_SIZE = 48
+
    def __init__(self, stream):
        self.stream = stream
        data = self.data = StreamSlicer(stream)
@ -105,14 +105,32 @@ class MetadataUpdater(object):
        have_exth = self.have_exth = (flags & 0x40) != 0
        self.cover_record = self.thumbnail_record = None
        self.timestamp = None
-
        self.pdbrecords = self.get_pdbrecords()
+
+        self.drm_block = None
+        if self.encryption_type != 0:
+            if self.have_exth:
+                self.drm_block = self.fetchDRMdata()
+            else:
+                raise MobiError('Unable to set metadata on DRM file without EXTH header')
+
+        self.original_exth_records = {}
        if not have_exth:
            self.create_exth()
-
+            self.have_exth = True
        # Fetch timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()

+    def fetchDRMdata(self):
+        ''' Fetch the DRM keys '''
+        drm_offset = int(unpack('>I', self.record0[0xa8:0xac])[0])
+        self.drm_key_count = int(unpack('>I', self.record0[0xac:0xb0])[0])
+        drm_keys = ''
+        for x in range(self.drm_key_count):
+            base_addr = drm_offset + (x * self.DRM_KEY_SIZE)
+            drm_keys += self.record0[base_addr:base_addr + self.DRM_KEY_SIZE]
+        return drm_keys
+
    def fetchEXTHFields(self):
        stream = self.stream
        record0 = self.record0
@ -131,14 +149,18 @@ class MetadataUpdater(object):
            content = exth[pos + 8: pos + size]
            pos += size

+            self.original_exth_records[id] = content
+
            if id == 106:
                self.timestamp = content
            elif id == 201:
-                rindex, = self.cover_rindex, = unpack('>I', content)
-                self.cover_record = self.record(rindex + image_base)
+                rindex, = self.cover_rindex, = unpack('>i', content)
+                if rindex > 0 :
+                    self.cover_record = self.record(rindex + image_base)
            elif id == 202:
-                rindex, = self.thumbnail_rindex, = unpack('>I', content)
-                self.thumbnail_record = self.record(rindex + image_base)
+                rindex, = self.thumbnail_rindex, = unpack('>i', content)
+                if rindex > 0 :
+                    self.thumbnail_record = self.record(rindex + image_base)

    def patch(self, off, new_record0):
        # Save the current size of each record
@ -181,14 +203,15 @@ class MetadataUpdater(object):
        off = self.pdbrecords[section][0]
        self.patch(off, new)

-    def create_exth(self, exth=None):
+    def create_exth(self, new_title=None, exth=None):
        # Add an EXTH block to record 0, rewrite the stream
-        # self.hexdump(self.record0)
+        if isinstance(new_title, unicode):
+            new_title = new_title.encode(self.codec, 'replace')

-        # Fetch the title
-        title_offset, = struct.unpack('>L', self.record0[0x54:0x58])
-        title_length, = struct.unpack('>L', self.record0[0x58:0x5c])
-        title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
+        # Fetch the existing title
+        title_offset, = unpack('>L', self.record0[0x54:0x58])
+        title_length, = unpack('>L', self.record0[0x58:0x5c])
+        title_in_file, = unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])

        # Adjust length to accommodate PrimaryINDX if necessary
        mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
@ -207,22 +230,30 @@ class MetadataUpdater(object):
            exth = ['EXTH', pack('>II', 12, 0), pad]
            exth = ''.join(exth)

-        # Update title_offset
-        self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
+        # Update drm_offset(0xa8), title_offset(0x54)
+        if self.encryption_type != 0:
+            self.record0[0xa8:0xac] = pack('>L', 0x10 + mobi_header_length + len(exth))
+            self.record0[0xb0:0xb4] = pack('>L', len(self.drm_block))
+            self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth) + len(self.drm_block))
+        else:
+            self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
+
+        if new_title:
+            self.record0[0x58:0x5c] = pack('>L', len(new_title))

        # Create an updated Record0
        new_record0 = StringIO()
        new_record0.write(self.record0[:0x10 + mobi_header_length])
        new_record0.write(exth)
-        new_record0.write(title_in_file)
+        if self.encryption_type != 0:
+            new_record0.write(self.drm_block)
+        new_record0.write(new_title if new_title else title_in_file)

        # Pad to a 4-byte boundary
        trail = len(new_record0.getvalue()) % 4
        pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
        new_record0.write(pad)

-        #self.hexdump(new_record0.getvalue())
-
        # Rebuild the stream, update the pdbrecords pointers
        self.patchSection(0,new_record0.getvalue())

@ -244,7 +275,7 @@ class MetadataUpdater(object):
    def get_pdbrecords(self):
        pdbrecords = []
        for i in xrange(self.nrecs):
-            offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
+            offset, a1,a2,a3,a4 = unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
            flags, val = a1, a2<<16|a3<<8|a4
            pdbrecords.append( [offset, flags, val] )
        return pdbrecords
@ -275,6 +306,10 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)

    def update(self, mi):
+        def pop_exth_record(exth_id):
+            if exth_id in self.original_exth_records:
+                self.original_exth_records.pop(exth_id)
+
        if self.type != "BOOKMOBI":
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
                                "\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
@ -289,34 +324,49 @@ class MetadataUpdater(object):
        if mi.author_sort and pas:
            authors = mi.author_sort
            recs.append((100, authors.encode(self.codec, 'replace')))
+            pop_exth_record(100)
        elif mi.authors:
            authors = '; '.join(mi.authors)
            recs.append((100, authors.encode(self.codec, 'replace')))
+            pop_exth_record(100)
        if mi.publisher:
            recs.append((101, mi.publisher.encode(self.codec, 'replace')))
+            pop_exth_record(101)
        if mi.comments:
            recs.append((103, mi.comments.encode(self.codec, 'replace')))
+            pop_exth_record(103)
        if mi.isbn:
            recs.append((104, mi.isbn.encode(self.codec, 'replace')))
+            pop_exth_record(104)
        if mi.tags:
            subjects = '; '.join(mi.tags)
            recs.append((105, subjects.encode(self.codec, 'replace')))
+            pop_exth_record(105)
        if mi.pubdate:
            recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
+            pop_exth_record(106)
        elif mi.timestamp:
            recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
+            pop_exth_record(106)
        elif self.timestamp:
            recs.append((106, self.timestamp))
+            pop_exth_record(106)
        else:
-            recs.append((106, str(datetime.now()).encode(self.codec, 'replace')))
+            recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
+            pop_exth_record(106)
        if self.cover_record is not None:
            recs.append((201, pack('>I', self.cover_rindex)))
            recs.append((203, pack('>I', 0)))
+            pop_exth_record(201)
+            pop_exth_record(203)
        if self.thumbnail_record is not None:
            recs.append((202, pack('>I', self.thumbnail_rindex)))
+            pop_exth_record(202)

-        if getattr(self, 'encryption_type', -1) != 0:
-            raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
+        # Restore any original EXTH fields that weren't updated
+        for id in sorted(self.original_exth_records):
+            recs.append((id, self.original_exth_records[id]))
+        recs = sorted(recs, key=lambda x:(x[0],x[0]))

        exth = StringIO()
        for code, data in recs:
@ -332,7 +382,7 @@ class MetadataUpdater(object):
            raise MobiError('No existing EXTH record. Cannot update metadata.')

        self.record0[92:96] = iana2mobi(mi.language)
-        self.create_exth(exth)
+        self.create_exth(exth=exth, new_title=mi.title)

        # Fetch updated timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -12,12 +12,12 @@ from urllib import unquote
 from urlparse import urlparse

 from lxml import etree
-from dateutil import parser

 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.constants import __appname__, __version__, filesystem_encoding
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
+from calibre.utils.date import parse_date, isoformat


 class Resource(object):
@ -272,6 +272,10 @@ class Spine(ResourceCollection):
            self.id = idfunc(self.path)
            self.idref = None

+        def __repr__(self):
+            return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
+                    (self.path, self.id, self.is_linear)
+
    @staticmethod
    def from_opf_spine_element(itemrefs, manifest):
        s = Spine(manifest)
@ -280,7 +284,7 @@ class Spine(ResourceCollection):
            if idref is not None:
                path = s.manifest.path_for_id(idref)
                if path:
-                    r = Spine.Item(s.manifest.id_for_path, path, is_path=True)
+                    r = Spine.Item(lambda x:idref, path, is_path=True)
                    r.is_linear = itemref.get('linear', 'yes') == 'yes'
                    r.idref = idref
                    s.append(r)
@ -441,6 +445,8 @@ class OPF(object):
    guide_path      = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')

    title           = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
+    title_sort      = MetadataField('title_sort', formatter=lambda x:
+                        re.sub(r'\s+', ' ', x), is_dc=False)
    publisher       = MetadataField('publisher')
    language        = MetadataField('language')
    comments        = MetadataField('description')
@ -449,12 +455,14 @@ class OPF(object):
    series          = MetadataField('series', is_dc=False)
    series_index    = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
    rating          = MetadataField('rating', is_dc=False, formatter=int)
-    pubdate         = MetadataField('date', formatter=parser.parse)
+    pubdate         = MetadataField('date', formatter=parse_date)
    publication_type = MetadataField('publication_type', is_dc=False)
-    timestamp       = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
+    timestamp       = MetadataField('timestamp', is_dc=False,
+                                    formatter=parse_date)


-    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
+    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
+            populate_spine=True):
        if not hasattr(stream, 'read'):
            stream = open(stream, 'rb')
        raw = stream.read()
@ -477,7 +485,7 @@ class OPF(object):
            self.manifest = Manifest.from_opf_manifest_element(m, basedir)
        self.spine = None
        s = self.spine_path(self.root)
-        if s:
+        if populate_spine and s:
            self.spine = Spine.from_opf_spine_element(s, self.manifest)
        self.guide = None
        guide = self.guide_path(self.root)
@ -584,6 +592,15 @@ class OPF(object):
                if x.get('id', None) == idref:
                    yield x.get('href', '')

+    def first_spine_item(self):
+        items = self.iterspine()
+        if not items:
+            return None
+        idref = items[0].get('idref', '')
+        for x in self.itermanifest():
+            if x.get('id', None) == idref:
+                return x.get('href', None)
+
    def create_spine_item(self, idref):
        ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
        ans.tail = '\n\t\t'
@ -675,29 +692,6 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @dynamic_property
-    def title_sort(self):
-
-        def fget(self):
-            matches = self.title_path(self.metadata)
-            if matches:
-                for match in matches:
-                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
-                    if not ans:
-                        ans = match.get('file-as', None)
-                    if ans:
-                        return ans
-
-        def fset(self, val):
-            matches = self.title_path(self.metadata)
-            if matches:
-                for key in matches[0].attrib:
-                    if key.endswith('file-as'):
-                        matches[0].attrib.pop(key)
-                matches[0].set('file-as', unicode(val))
-
-        return property(fget=fget, fset=fset)
-
    @dynamic_property
    def tags(self):

@ -869,7 +863,8 @@ class OPF(object):
    def smart_update(self, mi):
        for attr in ('title', 'authors', 'author_sort', 'title_sort',
                     'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'language', 'tags', 'category', 'comments'):
+                     'isbn', 'language', 'tags', 'category', 'comments',
+                     'pubdate'):
            val = getattr(mi, attr, None)
            if val is not None and val != [] and val != (None, None):
                setattr(self, attr, val)
@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True):
            elem.text = text.strip()
        metadata.append(elem)

-    factory(DC('title'), mi.title, mi.title_sort)
+    factory(DC('title'), mi.title)
    for au in mi.authors:
        factory(DC('creator'), au, mi.author_sort, 'aut')
    factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
    if hasattr(mi.pubdate, 'isoformat'):
-        factory(DC('date'), mi.pubdate.isoformat())
+        factory(DC('date'), isoformat(mi.pubdate))
    factory(DC('language'), mi.language)
    if mi.category:
        factory(DC('type'), mi.category)
@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True):
    if mi.rating is not None:
        meta('rating', str(mi.rating))
    if hasattr(mi.timestamp, 'isoformat'):
-        meta('timestamp', mi.timestamp.isoformat())
+        meta('timestamp', isoformat(mi.timestamp))
    if mi.publication_type:
        meta('publication_type', mi.publication_type)
+    if mi.title_sort:
+        meta('title_sort', mi.title_sort)

    metadata[-1].tail = '\n' +(' '*4)

@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True):


 def test_m2o():
-    from datetime import datetime
+    from calibre.utils.date import now as nowf
    from cStringIO import StringIO
    mi = MetaInformation('test & title', ['a"1', "a'2"])
    mi.title_sort = 'a\'"b'
    mi.author_sort = 'author sort'
-    mi.pubdate = datetime.now()
+    mi.pubdate = nowf()
    mi.language = 'en'
    mi.category = 'test'
    mi.comments = 'what a fun book\n\n'
@ -1103,7 +1100,7 @@ def test_m2o():
    mi.series = 's"c\'l&<>'
    mi.series_index = 3.34
    mi.rating = 3
-    mi.timestamp = datetime.now()
+    mi.timestamp = nowf()
    mi.publication_type = 'ooooo'
    mi.rights = 'yes'
    mi.cover = 'asd.jpg'
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@ -8,11 +8,15 @@ Read metadata from RAR archives
 '''

 import os
-from cStringIO import StringIO
-from calibre.ptempfile import PersistentTemporaryFile
+
+from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
 from calibre.libunrar import extract_member, names
+from calibre import CurrentDir

 def get_metadata(stream):
+    from calibre.ebooks.metadata.archive import is_comic
+    from calibre.ebooks.metadata.meta import get_metadata
+
    path = getattr(stream, 'name', False)
    if not path:
        pt = PersistentTemporaryFile('_rar-meta.rar')
@ -21,16 +25,19 @@ def get_metadata(stream):
        path = pt.name
    path = os.path.abspath(path)
    file_names = list(names(path))
+    if is_comic(file_names):
+        return get_metadata(stream, 'cbr')
    for f in file_names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                data = extract_member(path, match=None, name=f)[1]
-                stream = StringIO(data)
-                from calibre.ebooks.metadata.meta import get_metadata
+                with TemporaryDirectory() as tdir:
+                    with CurrentDir(tdir):
+                       stream = extract_member(path, match=None, name=f,
+                               as_file=True)[1]
                return get_metadata(stream, stream_type)
-    raise ValueError('No ebook found in RAR archive') 
-        
+    raise ValueError('No ebook found in RAR archive')
+

--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@ -3,22 +3,31 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 import os
-from zipfile import ZipFile
-from cStringIO import StringIO

+from calibre.utils.zipfile import ZipFile
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir

 def get_metadata(stream):
+    from calibre.ebooks.metadata.meta import get_metadata
+    from calibre.ebooks.metadata.archive import is_comic
    stream_type = None
    zf = ZipFile(stream, 'r')
-    for f in zf.namelist():
+    names = zf.namelist()
+    if is_comic(names):
+        # Is probably a comic
+        return get_metadata(stream, 'cbz')
+
+    for f in names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                from calibre.ebooks.metadata.meta import get_metadata
-                stream = StringIO(zf.read(f))
-                return get_metadata(stream, stream_type)
+                with TemporaryDirectory() as tdir:
+                    with CurrentDir(tdir):
+                        path = zf.extract(f)
+                        return get_metadata(open(path, 'rb'), stream_type)
    raise ValueError('No ebook found in ZIP archive')


--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''

-import datetime
 import functools
 import os
 import re
 import struct
 import textwrap
-
 import cStringIO

 try:
@ -23,6 +21,7 @@ from lxml import html, etree

 from calibre import entity_to_unicode, CurrentDir
 from calibre.utils.filenames import ascii_filename
+from calibre.utils.date import parse_date
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks import DRMError
 from calibre.ebooks.chardet import ENCODING_PATS
@ -68,7 +67,10 @@ class EXTHHeader(object):
                pass
            elif id == 503: # Long title
                if not title or title == _('Unknown'):
-                    title = content
+                    try:
+                        title = content.decode(codec)
+                    except:
+                        pass
            #else:
            #    print 'unknown record', id, repr(content)
        if title:
@ -96,8 +98,7 @@ class EXTHHeader(object):
            self.mi.tags = list(set(self.mi.tags))
        elif id == 106:
            try:
-                self.mi.publish_date = datetime.datetime.strptime(
-                    content, '%Y-%m-%d', ).date()
+                self.mi.pubdate = parse_date(content, as_utc=False)
            except:
                pass
        elif id == 108:
@ -795,10 +796,11 @@ class MobiReader(object):
 def get_metadata(stream):
    from calibre.utils.logging import Log
    log = Log()
-
    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    try:
        mh = MetadataHeader(stream, log)
+        if mh.title and mh.title != _('Unknown'):
+            mi.title = mh.title

        if mh.exth is not None:
            if mh.exth.mi is not None:
@ -817,10 +819,15 @@ def get_metadata(stream):
        else:
            data  = mh.section_data(mh.first_image_index)
        buf = cStringIO.StringIO(data)
-        im = PILImage.open(buf)
-        obuf = cStringIO.StringIO()
-        im.convert('RGBA').save(obuf, format='JPEG')
-        mi.cover_data = ('jpg', obuf.getvalue())
+        try:
+            im = PILImage.open(buf)
+        except:
+            log.exception('Failed to read MOBI cover')
+        else:
+            obuf = cStringIO.StringIO()
+            im.convert('RGB').save(obuf, format='JPEG')
+            mi.cover_data = ('jpg', obuf.getvalue())
    except:
-        log.exception()
+        log.filter_level = Log.DEBUG
+        log.exception('Failed to read MOBI metadata')
    return mi
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -310,6 +310,7 @@ class Serializer(object):
        text = text.replace('&', '&amp;')
        text = text.replace('<', '&lt;')
        text = text.replace('>', '&gt;')
+        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
        self.buffer.write(encode(text))
@ -610,12 +611,21 @@ class MobiWriter(object):
            if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
                if offset != previousOffset + previousLength :
                    self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
-                    self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
+                    self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
                        (i-1, entries[i-1].title, previousOffset, previousLength) )
                    self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
                        (i, child.title, offset, previousOffset + previousLength) )
-                    self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
-                    self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
+                    # self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
+                    # self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
+                    # Dump the offending entry
+                    self._oeb.log.info("...")
+                    for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)):
+                        if z == i:
+                            self._oeb.log.warning("child %03d: %s" % (z, entries[z]))
+                        else:
+                            self._oeb.log.info("child %03d: %s" % (z, entries[z]))
+                    self._oeb.log.info("...")
+
                    self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
                    # Zero out self._HTMLRecords, return False
                    self._HTMLRecords = []
@ -1366,7 +1376,7 @@ class MobiWriter(object):
            self._text_length,
            self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
-        title = str(metadata.title[0])
+        title = unicode(metadata.title[0]).encode('utf-8')
        # The MOBI Header

        # 0x0 - 0x3
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
 from urlparse import urljoin

 from lxml import etree, html
+from cssutils import CSSParser

 import calibre
-from cssutils import CSSParser
+from calibre.constants import filesystem_encoding
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -434,10 +435,18 @@ class DirContainer(object):

    def namelist(self):
        names = []
-        for root, dirs, files in os.walk(self.rootdir):
+        base = self.rootdir
+        if isinstance(base, unicode):
+            base = base.encode(filesystem_encoding)
+        for root, dirs, files in os.walk(base):
            for fname in files:
                fname = os.path.join(root, fname)
                fname = fname.replace('\\', '/')
+                if not isinstance(fname, unicode):
+                    try:
+                        fname = fname.decode(filesystem_encoding)
+                    except:
+                        continue
                names.append(fname)
        return names

@ -842,8 +851,10 @@ class Manifest(object):
                    self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
                    nroot = etree.fromstring('<html><body/></html>')
                    parent = nroot[0]
-                for child in list(data):
-                    child.getparent().remove(child)
+                for child in list(data.iter()):
+                    oparent = child.getparent()
+                    if oparent is not None:
+                        oparent.remove(child)
                    parent.append(child)
                data = nroot

@ -1567,14 +1578,17 @@ class TOC(object):
            parent = etree.Element(NCX('navMap'))
        for node in self.nodes:
            id = node.id or unicode(uuid.uuid4())
-            attrib = {'id': id, 'playOrder': str(node.play_order)}
+            po = node.play_order
+            if po == 0:
+                po = 1
+            attrib = {'id': id, 'playOrder': str(po)}
            if node.klass:
                attrib['class'] = node.klass
            point = element(parent, NCX('navPoint'), attrib=attrib)
            label = etree.SubElement(point, NCX('navLabel'))
            title = node.title
            if title:
-                title = re.sub(r'\s', ' ', title)
+                title = re.sub(r'\s+', ' ', title)
            element(label, NCX('text')).text = title
            element(point, NCX('content'), src=urlunquote(node.href))
            node.to_ncx(point)
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -120,7 +120,10 @@ class EbookIterator(object):
        bad_map = {}
        font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
        for csspath in css_files:
-            css = open(csspath, 'rb').read().decode('utf-8', 'replace')
+            try:
+                css = open(csspath, 'rb').read().decode('utf-8', 'replace')
+            except:
+                continue
            for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
                block  = match.group(1)
                family = font_family_pat.search(block)
@ -181,8 +184,9 @@ class EbookIterator(object):
        if hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

-
-        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
+        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
+        if self.opf is None:
+            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os
-from datetime import datetime
+from calibre.utils.date import isoformat, now

 def meta_info_to_oeb_metadata(mi, m, log):
    from calibre.ebooks.oeb.base import OPF
@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log):
            m.add('subject', t)
    if mi.pubdate is not None:
        m.clear('date')
-        m.add('date', mi.pubdate.isoformat())
+        m.add('date', isoformat(mi.pubdate))
    if mi.timestamp is not None:
        m.clear('timestamp')
-        m.add('timestamp', mi.timestamp.isoformat())
+        m.add('timestamp', isoformat(mi.timestamp))
    if mi.rights is not None:
        m.clear('rights')
        m.add('rights', mi.rights)
@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
        m.clear('publication_type')
        m.add('publication_type', mi.publication_type)
    if not m.timestamp:
-        m.add('timestamp', datetime.now().isoformat())
+        m.add('timestamp', isoformat(now()))


 class MergeMetadata(object):
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -29,6 +29,9 @@ class RescaleImages(object):


        page_width, page_height = self.opts.dest.width, self.opts.dest.height
+        if not getattr(self.opts, 'is_image_collection', False):
+            page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
+            page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
        for item in self.oeb.manifest:
            if item.media_type.startswith('image'):
                raw = item.data
@ -53,7 +56,8 @@ class RescaleImages(object):
                scaled, new_width, new_height = fit_image(width, height,
                        page_width, page_height)
                if scaled:
-                    self.log('Rescaling image', item.href)
+                    self.log('Rescaling image from %dx%d to %dx%d'%(
+                        width, height, new_width, new_height), item.href)
                    if qt:
                        img = img.scaled(new_width, new_height,
                                Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
--- a/src/calibre/ebooks/pdf/main.cpp
+++ b/src/calibre/ebooks/pdf/main.cpp
@ -169,6 +169,8 @@ int main(int argc, char **argv) {
    char *memblock;
    ifstream::pos_type size;
    int ret = 0;
+    map<string,string> info;
+    Reflow *reflow = NULL;


    if (argc != 2)  {
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
    }

    try {
-        Reflow reflow(memblock, size);
-        reflow.render();
-        vector<char> *data = reflow.render_first_page();
+        reflow = new Reflow(memblock, size);
+        info = reflow->get_info();
+        for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
+            cout << (*it).first << " : " << (*it).second << endl;
+        }
+        //reflow->render();
+        vector<char> *data = reflow->render_first_page();
        ofstream file("cover.png", ios::binary);
        file.write(&((*data)[0]), data->size());
        delete data;
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
        cerr << e.what() << endl;
        ret = 1;
    }
-
+    delete reflow;
    delete[] memblock;
    return ret;
 }
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys
+import sys, os

 from lxml import etree

@ -47,6 +47,10 @@ class Image(Element):
        return '<img src="%s" width="%dpx" height="%dpx"/>' % \
                (self.src, int(self.width), int(self.height))

+    def dump(self, f):
+        f.write(self.to_html())
+        f.write('\n')
+

 class Text(Element):

@ -91,6 +95,10 @@ class Text(Element):
    def to_html(self):
        return self.raw

+    def dump(self, f):
+        f.write(self.to_html().encode('utf-8'))
+        f.write('\n')
+
 class FontSizeStats(dict):

    def __init__(self, stats):
@ -143,6 +151,14 @@ class Column(object):
    def add(self, elem):
        if elem in self.elements: return
        self.elements.append(elem)
+        self._post_add()
+
+    def prepend(self, elem):
+        if elem in self.elements: return
+        self.elements.insert(0, elem)
+        self._post_add()
+
+    def _post_add(self):
        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
        self.top = self.elements[0].top
        self.bottom = self.elements[-1].bottom
@ -183,6 +199,11 @@ class Column(object):
            return None
        return self.elements[idx-1]

+    def dump(self, f, num):
+        f.write('******** Column %d\n\n'%num)
+        for elem in self.elements:
+            elem.dump(f)
+

 class Box(list):

@ -282,7 +303,6 @@ class Region(object):
                mc = self.columns[0]
            return mc

-        print
        for c in singleton.columns:
            for elem in c:
                col = most_suitable_column(elem)
@ -303,6 +323,51 @@ class Region(object):
        for x in self.columns:
            yield x

+    def absorb_regions(self, regions, at):
+        for region in regions:
+            self.absorb_region(region, at)
+
+    def absorb_region(self, region, at):
+        if len(region.columns) <= len(self.columns):
+            for i in range(len(region.columns)):
+                src, dest = region.columns[i], self.columns[i]
+                if at != 'bottom':
+                    src = reversed(list(iter(src)))
+                for elem in src:
+                    func = dest.add if at == 'bottom' else dest.prepend
+                    func(elem)
+
+        else:
+            col_map = {}
+            for i, col in enumerate(region.columns):
+                max_overlap, max_overlap_index = 0, 0
+                for j, dcol in enumerate(self.columns):
+                    sint = Interval(col.left, col.right)
+                    dint = Interval(dcol.left, dcol.right)
+                    width = sint.intersection(dint).width
+                    if width > max_overlap:
+                        max_overlap = width
+                        max_overlap_index = j
+                col_map[i] = max_overlap_index
+            lines = max(map(len, region.columns))
+            if at == 'bottom':
+                lines = range(lines)
+            else:
+                lines = range(lines-1, -1, -1)
+            for i in lines:
+                for j, src in enumerate(region.columns):
+                    dest = self.columns[col_map[j]]
+                    if i < len(src):
+                        func = dest.add if at == 'bottom' else dest.prepend
+                        func(src.elements[i])
+
+    def dump(self, f):
+        f.write('############################################################\n')
+        f.write('########## Region (%d columns) ###############\n'%len(self.columns))
+        f.write('############################################################\n\n')
+        for i, col in enumerate(self.columns):
+            col.dump(f, i)
+
    def linearize(self):
        self.elements = []
        for x in self.columns:
@ -375,7 +440,8 @@ class Page(object):
                self.font_size_stats[t.font_size] = 0
            self.font_size_stats[t.font_size] += len(t.text_as_string)
            self.average_text_height += t.height
-        self.average_text_height /= len(self.texts)
+        if len(self.texts):
+            self.average_text_height /= len(self.texts)

        self.font_size_stats = FontSizeStats(self.font_size_stats)

@ -430,7 +496,20 @@ class Page(object):
        if not current_region.is_empty:
            self.regions.append(current_region)

+        if self.opts.verbose > 2:
+            self.debug_dir = 'page-%d'%self.number
+            os.mkdir(self.debug_dir)
+            self.dump_regions('pre-coalesce')
+
        self.coalesce_regions()
+        self.dump_regions('post-coalesce')
+
+    def dump_regions(self, fname):
+        fname = 'regions-'+fname+'.txt'
+        with open(os.path.join(self.debug_dir, fname), 'wb') as f:
+            f.write('Page #%d\n\n'%self.number)
+            for region in self.regions:
+                region.dump(f)

    def coalesce_regions(self):
        # find contiguous sets of small regions
@ -439,47 +518,57 @@ class Page(object):
        # region)
        found = True
        absorbed = set([])
+        processed = set([])
        while found:
            found = False
            for i, region in enumerate(self.regions):
-                if region.is_small:
+                if region in absorbed:
+                    continue
+                if region.is_small and region not in processed:
                    found = True
-                    regions = []
+                    processed.add(region)
+                    regions = [region]
+                    end = i+1
                    for j in range(i+1, len(self.regions)):
+                        end = j
                        if self.regions[j].is_small:
                            regions.append(self.regions[j])
                        else:
                            break
                    prev_region = None if i == 0 else i-1
-                    next_region = j if self.regions[j] not in regions else None
+                    next_region = end if end < len(self.regions) and self.regions[end] not in regions else None
+                    absorb_at = 'bottom'
                    if prev_region is None and next_region is not None:
                        absorb_into = next_region
+                        absorb_at = 'top'
                    elif next_region is None and prev_region is not None:
                        absorb_into = prev_region
                    elif prev_region is None and next_region is None:
                        if len(regions) > 1:
-                            absorb_into = regions[0]
+                            absorb_into = i
                            regions = regions[1:]
                        else:
                            absorb_into = None
                    else:
                        absorb_into = prev_region
-                        if next_region.line_count >= prev_region.line_count:
+                        if self.regions[next_region].line_count >= \
+                                self.regions[prev_region].line_count:
                            avg_column_count = sum([len(r.columns) for r in
                                regions])/float(len(regions))
-                            if next_region.line_count > prev_region.line_count \
-                               or abs(avg_column_count - len(prev_region.columns)) \
-                               > abs(avg_column_count - len(next_region.columns)):
+                            if self.regions[next_region].line_count > \
+                                    self.regions[prev_region].line_count \
+                               or abs(avg_column_count -
+                                       len(self.regions[prev_region].columns)) \
+                               > abs(avg_column_count -
+                                       len(self.regions[next_region].columns)):
                                   absorb_into = next_region
+                                   absorb_at = 'top'
                    if absorb_into is not None:
-                        absorb_into.absorb_region(regions)
+                        self.regions[absorb_into].absorb_regions(regions, absorb_at)
                        absorbed.update(regions)
-                    i = j
        for region in absorbed:
            self.regions.remove(region)

-
-
    def sort_into_columns(self, elem, neighbors):
        neighbors.add(elem)
        neighbors = sorted(neighbors, cmp=lambda x,y:cmp(x.left, y.left))
@ -598,8 +687,9 @@ class PDFDocument(object):
        for elem in self.elements:
            html.extend(elem.to_html())
        html += ['</body>', '</html>']
+        raw = (u'\n'.join(html)).replace('</strong><strong>', '')
        with open('index.html', 'wb') as f:
-            f.write((u'\n'.join(html)).encode('utf-8'))
+            f.write(raw.encode('utf-8'))



--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -182,10 +182,10 @@ class PML_HTMLizer(object):
        return pml

    def strip_pml(self, pml):
-        pml = re.sub(r'\\C\d=".+*"', '', pml)
-        pml = re.sub(r'\\Fn=".+*"', '', pml)
-        pml = re.sub(r'\\Sd=".+*"', '', pml)
-        pml = re.sub(r'\\.=".+*"', '', pml)
+        pml = re.sub(r'\\C\d=".*"', '', pml)
+        pml = re.sub(r'\\Fn=".*"', '', pml)
+        pml = re.sub(r'\\Sd=".*"', '', pml)
+        pml = re.sub(r'\\.=".*"', '', pml)
        pml = re.sub(r'\\X\d', '', pml)
        pml = re.sub(r'\\S[pbd]', '', pml)
        pml = re.sub(r'\\Fn', '', pml)
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@ -131,9 +131,9 @@ class RtfTokenParser():
                if isString(self.tokens[i].name, "\\'"):
                    i = i + 1
                    if not isinstance(self.tokens[i], tokenData):
-                        raise BaseException('Error: token8bitChar without data.')
+                        raise Exception('Error: token8bitChar without data.')
                    if len(self.tokens[i].data) < 2:
-                        raise BaseException('Error: token8bitChar without data.')
+                        raise Exception('Error: token8bitChar without data.')
                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
                    if len(self.tokens[i].data) > 2:
                        newTokens.append(tokenData(self.tokens[i].data[2:]))
@ -195,7 +195,7 @@ class RtfTokenParser():
                            i = i + 1
                            j = j + 1
                            continue
-                        raise BaseException('Error: incorect utf replacement.')
+                        raise Exception('Error: incorect utf replacement.')

                    #calibre rtf2xml does not support utfreplace
                    replace = []
@ -248,7 +248,7 @@ class RtfTokenizer():

            if isChar(self.rtfData[i], '\\'):
                if i + 1 >= len(self.rtfData):
-                    raise BaseException('Error: Control character found at the end of the document.')
+                    raise Exception('Error: Control character found at the end of the document.')

                if lastDataStart > -1:
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@ -269,7 +269,7 @@ class RtfTokenizer():
                        i = i + 1

                    if not consumed:
-                        raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
+                        raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))

                    #we have numeric argument before delimiter
                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
@ -283,10 +283,10 @@ class RtfTokenizer():
                            l = l + 1
                            i = i + 1
                            if l > 10 :
-                                raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
+                                raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])

                        if not consumed:
-                            raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
+                            raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])

                    separator = ''
                    if isChar(self.rtfData[i], ' '):
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -27,7 +27,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \
    paragraph_def, convert_to_tags, output, copy, \
    list_numbers, info, pict, table_info, fonts, paragraphs, \
    body_styles, preamble_rest, group_styles, \
-    inline, correct_unicode
+    inline
 from calibre.ebooks.rtf2xml.old_rtf import OldRtf

 """
@ -256,15 +256,6 @@ class ParseRtf:
           )
        pict_obj.process_pict()
        self.__bracket_match('pict_data_info')
-        correct_uni_obj = correct_unicode.CorrectUnicode(
-            in_file = self.__temp_file,
-            bug_handler = RtfInvalidCodeException,
-            copy = self.__copy,
-            run_level = self.__run_level,
-            exception_handler = InvalidRtfException,
-           )
-        correct_uni_obj.correct_unicode()
-        self.__bracket_match('correct_unicode_info')
        combine_obj = combine_borders.CombineBorders(
            in_file = self.__temp_file,
            bug_handler = RtfInvalidCodeException,
--- a/src/calibre/ebooks/rtf2xml/correct_unicode.py
+++ b/src/calibre/ebooks/rtf2xml/correct_unicode.py
@ -1,94 +0,0 @@
-#########################################################################
-#                                                                       #
-#                                                                       #
-#   copyright 2002 Paul Henry Tremblay                                  #
-#                                                                       #
-#   This program is distributed in the hope that it will be useful,     #
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
-#   General Public License for more details.                            #
-#                                                                       #
-#   You should have received a copy of the GNU General Public License   #
-#   along with this program; if not, write to the Free Software         #
-#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
-#   02111-1307 USA                                                      #
-#                                                                       #
-#                                                                       #
-#########################################################################
-import os, re,  tempfile
-from calibre.ebooks.rtf2xml import copy
-class CorrectUnicode:
-    """
-    corrects sequences such as \u201c\'F0\'BE
-    Where \'F0\'BE has to be eliminated.
-    """
-    def __init__(self,
-            in_file,
-            exception_handler,
-            bug_handler,
-            copy = None,
-            run_level = 1,
-            ):
-        self.__file = in_file
-        self.__bug_handler = bug_handler
-        self.__copy = copy
-        self.__run_level = run_level
-        self.__write_to = tempfile.mktemp()
-        self.__exception_handler = exception_handler
-        self.__bug_handler = bug_handler
-        self.__state = 'outside'
-        self.__utf_exp = re.compile(r'&#x(.*?);')
-    def __process_token(self, line):
-        if self.__state == 'outside':
-            if line[:5] == 'tx<ut':
-                self.__handle_unicode(line)
-            else:
-                self.__write_obj.write(line)
-        elif self.__state == 'after':
-            if line[:5] == 'tx<hx':
-                pass
-            elif line[:5] == 'tx<ut':
-                self.__handle_unicode(line)
-            else:
-                self.__state = 'outside'
-                self.__write_obj.write(line)
-        else:
-            raise 'should\'t happen'
-    def __handle_unicode(self, line):
-        token = line[16:]
-        match_obj = re.search(self.__utf_exp, token)
-        if match_obj:
-            uni_char = match_obj.group(1)
-            dec_num = int(uni_char, 16)
-            if dec_num > 57343 and dec_num < 63743:
-                self.__state = 'outside'
-            else:
-                self.__write_obj.write(line)
-                self.__state = 'after'
-        else:
-            self.__write_obj.write(line)
-            self.__state = 'outside'
-    def correct_unicode(self):
-        """
-        Requires:
-            nothing
-        Returns:
-            nothing (changes the original file)
-        Logic:
-            Read one line in at a time.
-        """
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            self.__process_token(line)
-        read_obj.close()
-        self.__write_obj.close()
-        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
-        if self.__copy:
-            copy_obj.copy_file(self.__write_to, "correct_unicode.data")
-        copy_obj.rename(self.__write_to, self.__file)
-        os.remove(self.__write_to)
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -72,7 +72,7 @@ class Tokenize:
        return line
    def __compile_expressions(self):
        self.__ms_hex_exp = re.compile(r"\\\'(..)")
-        self.__utf_exp = re.compile(r"\\u(-?\d{3,6})")
+        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
        self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
        self.__par_exp = re.compile(r'\\$')
        self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -2,9 +2,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 """ The GUI """
 import os
+from threading import RLock
+
 from PyQt4.QtCore import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QSize, \
                         QByteArray, QTranslator, QCoreApplication, QThread, \
-                         QEvent
+                         QEvent, QTimer, pyqtSignal
 from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
                        QIcon, QTableView, QApplication, QDialog, QPushButton

@ -234,16 +236,17 @@ def human_readable(size):
    return size + " " + suffix

 class Dispatcher(QObject):
-    '''Convenience class to ensure that a function call always happens in the GUI thread'''
-    SIGNAL = SIGNAL('dispatcher(PyQt_PyObject,PyQt_PyObject)')
+    '''Convenience class to ensure that a function call always happens in the
+    thread the reciver was created in.'''
+    dispatch_signal = pyqtSignal(object, object)

    def __init__(self, func):
        QObject.__init__(self)
        self.func = func
-        self.connect(self, self.SIGNAL, self.dispatch, Qt.QueuedConnection)
+        self.dispatch_signal.connect(self.dispatch, type=Qt.QueuedConnection)

    def __call__(self, *args, **kwargs):
-        self.emit(self.SIGNAL, args, kwargs)
+        self.dispatch_signal.emit(args, kwargs)

    def dispatch(self, args, kwargs):
        self.func(*args, **kwargs)
@ -533,6 +536,8 @@ class Application(QApplication):
        self._translator = None
        self.load_translations()
        qt_app = self
+        self._file_open_paths = []
+        self._file_open_lock = RLock()

        if islinux:
            self.setStyleSheet('''
@ -545,6 +550,12 @@ class Application(QApplication):
                    }
            ''')

+    def _send_file_open_events(self):
+        with self._file_open_lock:
+            if self._file_open_paths:
+                self.file_event_hook(self._file_open_paths)
+                self._file_open_paths = []
+

    def load_translations(self):
        if self._translator is not None:
@ -557,7 +568,9 @@ class Application(QApplication):
        if callable(self.file_event_hook) and e.type() == QEvent.FileOpen:
            path = unicode(e.file())
            if os.access(path, os.R_OK):
-                self.file_event_hook(path)
+                with self._file_open_lock:
+                    self._file_open_paths.append(path)
+                QTimer.singleShot(1000, self._send_file_open_events)
            return True
        else:
            return QApplication.event(self, e)
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -23,7 +23,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),

    plumber.run()

-def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
+def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, connected_device,
        notification=DummyReporter(), log=None):
    if log is None:
        log = Log()
@ -44,6 +44,7 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
    # Populate opts
    # opts.gui_search_text = something
    opts.catalog_title = title
+    opts.connected_device = connected_device
    opts.ids = ids
    opts.search_text = None
    opts.sort_by = None
--- a/src/calibre/gui2/convert/page_setup.ui
+++ b/src/calibre/gui2/convert/page_setup.ui
@ -115,6 +115,9 @@
          <property name="decimals">
           <number>1</number>
          </property>
+          <property name="maximum">
+           <double>200.000000000000000</double>
+          </property>
         </widget>
        </item>
        <item row="1" column="0">
@ -135,6 +138,9 @@
          <property name="decimals">
           <number>1</number>
          </property>
+          <property name="maximum">
+           <double>200.000000000000000</double>
+          </property>
         </widget>
        </item>
        <item row="2" column="0">
@ -155,6 +161,9 @@
          <property name="decimals">
           <number>1</number>
          </property>
+          <property name="maximum">
+           <double>200.000000000000000</double>
+          </property>
         </widget>
        </item>
        <item row="3" column="0">
@ -175,6 +184,9 @@
          <property name="decimals">
           <number>1</number>
          </property>
+          <property name="maximum">
+           <double>200.000000000000000</double>
+          </property>
         </widget>
        </item>
       </layout>
--- a/src/calibre/gui2/dialogs/choose_format.py
+++ b/src/calibre/gui2/dialogs/choose_format.py
@ -12,7 +12,8 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
        QDialog.__init__(self, window)
        Ui_ChooseFormatDialog.__init__(self)
        self.setupUi(self)
-        self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept())
+        self.connect(self.formats, SIGNAL('activated(QModelIndex)'),
+                self.activated_slot)

        self.msg.setText(msg)
        for format in formats:
@ -20,6 +21,15 @@ class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
                                                 format.upper()))
        self._formats = formats
        self.formats.setCurrentRow(0)
+        self._format = None
+
+    def activated_slot(self, *args):
+        self.accept()

    def format(self):
-        return self._formats[self.formats.currentRow()]
+        return self._format
+
+    def accept(self):
+        self._format = self._formats[self.formats.currentRow()]
+        return QDialog.accept(self)
+
--- a/src/calibre/gui2/dialogs/config/config.ui
+++ b/src/calibre/gui2/dialogs/config/config.ui
@ -174,7 +174,7 @@
             <item>
              <widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
               <property name="text">
-                <string>Overwrite &amp; author/title by default when fetching metadata</string>
+                <string>&amp;Overwrite author and title by default when fetching metadata</string>
               </property>
              </widget>
             </item>
@ -612,7 +612,7 @@
             <item row="0" column="0" colspan="2">
              <widget class="QLabel" name="label_22">
               <property name="text">
-                <string>calibre can send your books to you (or your reader) by email</string>
+                <string>calibre can send your books to you (or your reader) by email. Emails will be automatically sent for downloaded news to all email addresses that have Auto-send checked.</string>
               </property>
               <property name="wordWrap">
                <bool>true</bool>
--- a/src/calibre/gui2/dialogs/fetch_metadata.ui
+++ b/src/calibre/gui2/dialogs/fetch_metadata.ui
@ -119,7 +119,7 @@
   <item>
    <widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
     <property name="text">
-      <string>Overwrite &amp;author/title with author/title of selected book</string>
+      <string>Overwrite author and title with author and title of selected book</string>
     </property>
    </widget>
   </item>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -10,7 +10,6 @@ import os
 import re
 import time
 import traceback
-from datetime import datetime, timedelta

 from PyQt4.Qt import SIGNAL, QObject, QCoreApplication, Qt, QTimer, QThread, QDate, \
    QPixmap, QListWidgetItem, QDialog
@ -28,7 +27,8 @@ from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, \
 from calibre.ebooks.metadata.library_thing import cover_from_isbn
 from calibre import islinux
 from calibre.ebooks.metadata.meta import get_metadata
-from calibre.utils.config import prefs
+from calibre.utils.config import prefs, tweaks
+from calibre.utils.date import qt_to_dt
 from calibre.customize.ui import run_plugins_on_import, get_isbndb_key
 from calibre.gui2.dialogs.config.social import SocialMetadata

@ -354,12 +354,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.comments.setPlainText(comments if comments else '')
        cover = self.db.cover(row)
        pubdate = db.pubdate(self.id, index_is_id=True)
-        self.local_timezone_offset = timedelta(seconds=time.timezone) - timedelta(hours=time.daylight)
-        pubdate = pubdate - self.local_timezone_offset
        self.pubdate.setDate(QDate(pubdate.year, pubdate.month,
            pubdate.day))
        timestamp = db.timestamp(self.id, index_is_id=True)
-        timestamp = timestamp - self.local_timezone_offset
+        self.orig_timestamp = timestamp
        self.date.setDate(QDate(timestamp.year, timestamp.month,
            timestamp.day))

@ -399,6 +397,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            if not pm.isNull():
                self.cover.setPixmap(pm)
            self.cover_data = cover
+        self.original_series_name = unicode(self.series.text()).strip()

    def validate_isbn(self, isbn):
        isbn = unicode(isbn).strip()
@ -582,7 +581,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                        if book.isbn: self.isbn.setText(book.isbn)
                        if book.pubdate:
                            d = book.pubdate
-                            d = d - self.local_timezone_offset
                            self.pubdate.setDate(QDate(d.year, d.month, d.day))
                        summ = book.comments
                        if summ:
@ -610,10 +608,13 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
    def increment_series_index(self):
        if self.db is not None:
            try:
-                series = unicode(self.series.text())
-                if series:
-                    ns = self.db.get_next_series_num_for(series)
+                series = unicode(self.series.text()).strip()
+                if series and series != self.original_series_name:
+                    ns = 1
+                    if tweaks['series_index_auto_increment'] == 'next':
+                        ns = self.db.get_next_series_num_for(series)
                    self.series_index.setValue(ns)
+                    self.original_series_name = series
            except:
                traceback.print_exc()

@ -645,18 +646,19 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                    re.sub(r'[^0-9a-zA-Z]', '', unicode(self.isbn.text())), notify=False)
            self.db.set_rating(self.id, 2*self.rating.value(), notify=False)
            self.db.set_publisher(self.id, qstring_to_unicode(self.publisher.currentText()), notify=False)
-            self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','), notify=False)
-            self.db.set_series(self.id, qstring_to_unicode(self.series.currentText()), notify=False)
+            self.db.set_tags(self.id, [x.strip() for x in
+                unicode(self.tags.text()).split(',')], notify=False)
+            self.db.set_series(self.id,
+                    unicode(self.series.currentText()).strip(), notify=False)
            self.db.set_series_index(self.id, self.series_index.value(), notify=False)
            self.db.set_comment(self.id, qstring_to_unicode(self.comments.toPlainText()), notify=False)
            d = self.pubdate.date()
-            d = datetime(d.year(), d.month(), d.day())
-            d = d + self.local_timezone_offset
+            d = qt_to_dt(d)
            self.db.set_pubdate(self.id, d)
            d = self.date.date()
-            d = datetime(d.year(), d.month(), d.day())
-            d = d + self.local_timezone_offset
-            self.db.set_timestamp(self.id, d)
+            d = qt_to_dt(d)
+            if d.date() != self.orig_timestamp.date():
+                self.db.set_timestamp(self.id, d)

            if self.cover_changed:
                if self.cover_data is not None:
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 Scheduler for automated recipe downloads
 '''

-from datetime import datetime, timedelta
+from datetime import timedelta

 from PyQt4.Qt import QDialog, SIGNAL, Qt, QTime, QObject, QMenu, \
        QAction, QIcon, QMutex, QTimer
@ -17,6 +17,7 @@ from calibre.gui2.search_box import SearchBox2
 from calibre.gui2 import config as gconf, error_dialog
 from calibre.web.feeds.recipes.model import RecipeModel
 from calibre.ptempfile import PersistentTemporaryFile
+from calibre.utils.date import utcnow

 class SchedulerDialog(QDialog, Ui_Dialog):

@ -185,7 +186,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
                self.day.setCurrentIndex(day+1)
                self.time.setTime(QTime(hour, minute))

-            d = datetime.utcnow() - last_downloaded
+            d = utcnow() - last_downloaded
            def hm(x): return (x-x%3600)//3600, (x%3600 - (x%3600)%60)//60
            hours, minutes = hm(d.seconds)
            tm = _('%d days, %d hours and %d minutes ago')%(d.days, hours, minutes)
--- a/src/calibre/gui2/dialogs/search.py
+++ b/src/calibre/gui2/dialogs/search.py
@ -5,21 +5,30 @@ from PyQt4.QtGui import QDialog

 from calibre.gui2.dialogs.search_ui import Ui_Dialog
 from calibre.gui2 import qstring_to_unicode
-
+from calibre.library.database2 import CONTAINS_MATCH, EQUALS_MATCH

 class SearchDialog(QDialog, Ui_Dialog):

    def __init__(self, *args):
        QDialog.__init__(self, *args)
        self.setupUi(self)
+        self.mc = ''

    def tokens(self, raw):
-        phrases = re.findall(r'\s+".*?"\s+', raw)
+        phrases = re.findall(r'\s*".*?"\s*', raw)
        for f in phrases:
            raw = raw.replace(f, ' ')
-        return [t.strip() for t in phrases + raw.split()]
+        phrases = [t.strip('" ') for t in phrases]
+        return ['"' + self.mc + t + '"' for t in phrases + [r.strip() for r in raw.split()]]

    def search_string(self):
+        mk = self.matchkind.currentIndex()
+        if mk == CONTAINS_MATCH:
+            self.mc = ''
+        elif mk == EQUALS_MATCH:
+            self.mc = '='
+        else:
+            self.mc = '~'
        all, any, phrase, none = map(lambda x: unicode(x.text()),
                (self.all, self.any, self.phrase, self.none))
        all, any, none = map(self.tokens, (all, any, none))
--- a/src/calibre/gui2/dialogs/search.ui
+++ b/src/calibre/gui2/dialogs/search.ui
@ -104,7 +104,64 @@
    </widget>
   </item>
   <item>
-    <widget class="QLabel" name="label_5" >
+    <widget class="QGroupBox" name="groupBox" >
+     <property name="maximumSize" >
+      <size>
+       <width>16777215</width>
+       <height>60</height>
+      </size>
+     </property>
+     <layout class="QHBoxLayout" name="horizontalLayout_5" >
+      <item>
+       <widget class="QLabel" name="label_5" >
+        <property name="text" >
+         <string>What kind of match to use:</string>
+        </property>
+        <property name="buddy" >
+         <cstring>matchkind</cstring>
+        </property>
+       </widget>
+      </item>
+      <item>
+       <widget class="QComboBox" name="matchkind">
+        <item>
+         <property name="text">
+          <string>Contains: the word or phrase matches anywhere in the metadata</string>
+         </property>
+        </item>
+        <item>
+         <property name="text">
+          <string>Equals: the word or phrase must match an entire metadata field</string>
+         </property>
+        </item>
+        <item>
+         <property name="text">
+          <string>Regular expression: the expression must match anywhere in the metadata</string>
+         </property>
+        </item>
+       </widget>
+      </item>
+      <item>
+       <widget class="QLabel" name="label_51" >
+        <property name="sizePolicy">
+         <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
+          <horstretch>40</horstretch>
+          <verstretch>0</verstretch>
+         </sizepolicy>
+        </property>
+        <property name="text" >
+         <string> </string>
+        </property>
+        <property name="buddy" >
+         <cstring>matchkind</cstring>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+   <item>
+    <widget class="QLabel" name="label_6" >
     <property name="maximumSize" >
      <size>
       <width>16777215</width>
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -1,11 +1,11 @@
-from calibre.ebooks.metadata import authors_to_string
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, textwrap, traceback, time, re
-from datetime import timedelta, datetime
-from operator import attrgetter

+import os, textwrap, traceback, re, shutil
+from operator import attrgetter
 from math import cos, sin, pi
+from contextlib import closing
+
 from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \
                        QItemDelegate, QPainterPath, QLinearGradient, QBrush, \
                        QPen, QStyle, QPainter, \
@ -17,13 +17,16 @@ from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, pyqtSignal, \
 from calibre import strftime
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.pyparsing import ParseException
-from calibre.library.database2 import FIELD_MAP
+from calibre.library.database2 import FIELD_MAP, _match, CONTAINS_MATCH, EQUALS_MATCH, REGEXP_MATCH
 from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
                         error_dialog
 from calibre.gui2.widgets import EnLineEdit, TagsLineEdit
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
-from calibre.ebooks.metadata import string_to_authors, fmt_sidx
+from calibre.ebooks.metadata import string_to_authors, fmt_sidx, \
+                                    authors_to_string
+from calibre.utils.config import tweaks
+from calibre.utils.date import dt_factory, qt_to_dt, isoformat

 class LibraryDelegate(QItemDelegate):
    COLOR    = QColor("blue")
@ -97,7 +100,10 @@ class DateDelegate(QStyledItemDelegate):

    def createEditor(self, parent, option, index):
        qde = QStyledItemDelegate.createEditor(self, parent, option, index)
-        qde.setDisplayFormat(unicode(qde.displayFormat()).replace('yy', 'yyyy'))
+        stdformat = unicode(qde.displayFormat())
+        if 'yyyy' not in stdformat:
+            stdformat = stdformat.replace('yy', 'yyyy')
+        qde.setDisplayFormat(stdformat)
        qde.setMinimumDate(QDate(101,1,1))
        qde.setCalendarPopup(True)
        return qde
@ -465,8 +471,11 @@ class BooksModel(QAbstractTableModel):
                    break
            if format is not None:
                pt = PersistentTemporaryFile(suffix='.'+format)
-                pt.write(self.db.format(id, format, index_is_id=True))
-                pt.flush()
+                with closing(self.db.format(id, format, index_is_id=True,
+                    as_file=True)) as src:
+                    shutil.copyfileobj(src, pt)
+                    pt.flush()
+                pt.seek(0)
                if set_metadata:
                    _set_metadata(pt, self.db.get_metadata(id, get_cover=True, index_is_id=True),
                                  format)
@ -499,8 +508,10 @@ class BooksModel(QAbstractTableModel):
                    break
            if format is not None:
                pt = PersistentTemporaryFile(suffix='.'+format)
-                pt.write(self.db.format(row, format))
-                pt.flush()
+                with closing(self.db.format(row, format, as_file=True)) as src:
+                    shutil.copyfileobj(src, pt)
+                    pt.flush()
+                pt.seek(0)
                if set_metadata:
                    _set_metadata(pt, self.db.get_metadata(row, get_cover=True),
                                  format)
@ -563,13 +574,11 @@ class BooksModel(QAbstractTableModel):
        def timestamp(r):
            dt = self.db.data[r][tmdx]
            if dt:
-                dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
                return QDate(dt.year, dt.month, dt.day)

        def pubdate(r):
            dt = self.db.data[r][pddx]
            if dt:
-                dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
                return QDate(dt.year, dt.month, dt.day)

        def rating(r):
@ -657,21 +666,20 @@ class BooksModel(QAbstractTableModel):
                    self.db.set_series_index(id, float(match.group(1)))
                    val = pat.sub('', val).strip()
                elif val:
-                    ni = self.db.get_next_series_num_for(val)
-                    if ni != 1:
-                        self.db.set_series_index(id, ni)
+                    if tweaks['series_index_auto_increment'] == 'next':
+                        ni = self.db.get_next_series_num_for(val)
+                        if ni != 1:
+                            self.db.set_series_index(id, ni)
                if val:
                    self.db.set_series(id, val)
            elif column == 'timestamp':
                if val.isNull() or not val.isValid():
                    return False
-                dt = datetime(val.year(), val.month(), val.day()) + timedelta(seconds=time.timezone) - timedelta(hours=time.daylight)
-                self.db.set_timestamp(id, dt)
+                self.db.set_timestamp(id, qt_to_dt(val, as_utc=False))
            elif column == 'pubdate':
                if val.isNull() or not val.isValid():
                    return False
-                dt = datetime(val.year(), val.month(), val.day()) + timedelta(seconds=time.timezone) - timedelta(hours=time.daylight)
-                self.db.set_pubdate(id, dt)
+                self.db.set_pubdate(id, qt_to_dt(val, as_utc=False))
            else:
                self.db.set(row, column, val)
            self.emit(SIGNAL("dataChanged(QModelIndex, QModelIndex)"), \
@ -888,7 +896,20 @@ class OnDeviceSearch(SearchQueryParser):

    def get_matches(self, location, query):
        location = location.lower().strip()
-        query = query.lower().strip()
+
+        matchkind = CONTAINS_MATCH
+        if len(query) > 1:
+            if query.startswith('\\'):
+                query = query[1:]
+            elif query.startswith('='):
+                matchkind = EQUALS_MATCH
+                query = query[1:]
+            elif query.startswith('~'):
+                matchkind = REGEXP_MATCH
+                query = query[1:]
+        if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
+            query = query.lower()
+
        if location not in ('title', 'author', 'tag', 'all', 'format'):
            return set([])
        matches = set([])
@ -899,13 +920,24 @@ class OnDeviceSearch(SearchQueryParser):
             'tag':lambda x: ','.join(getattr(x, 'tags')).lower(),
             'format':lambda x: os.path.splitext(x.path)[1].lower()
             }
-        for i, v in enumerate(locations):
-            locations[i] = q[v]
-        for i, r in enumerate(self.model.db):
-            for loc in locations:
+        for index, row in enumerate(self.model.db):
+            for locvalue in locations:
+                accessor = q[locvalue]
                try:
-                    if query in loc(r):
-                        matches.add(i)
+                    ### Can't separate authors because comma is used for name sep and author sep
+                    ### Exact match might not get what you want. For that reason, turn author
+                    ### exactmatch searches into contains searches.
+                    if locvalue == 'author' and matchkind == EQUALS_MATCH:
+                        m = CONTAINS_MATCH
+                    else:
+                        m = matchkind
+
+                    if locvalue == 'tag':
+                        vals = accessor(row).split(',')
+                    else:
+                        vals = [accessor(row)]
+                    if _match(query, vals, m):
+                        matches.add(index)
                        break
                except ValueError: # Unicode errors
                    import traceback
@ -1003,7 +1035,8 @@ class DeviceBooksModel(BooksModel):
        def datecmp(x, y):
            x = self.db[x].datetime
            y = self.db[y].datetime
-            return cmp(datetime(*x[0:6]), datetime(*y[0:6]))
+            return cmp(dt_factory(x, assume_utc=True), dt_factory(y,
+                assume_utc=True))
        def sizecmp(x, y):
            x, y = int(self.db[x].size), int(self.db[y].size)
            return cmp(x, y)
@ -1052,10 +1085,8 @@ class DeviceBooksModel(BooksModel):
            type = ext[1:].lower()
        data[_('Format')] = type
        data[_('Path')] = item.path
-        dt = item.datetime
-        dt = datetime(*dt[0:6])
-        dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
-        data[_('Timestamp')] = strftime('%a %b %d %H:%M:%S %Y', dt.timetuple())
+        dt = dt_factory(item.datetime, assume_utc=True)
+        data[_('Timestamp')] = isoformat(dt, sep=' ', as_utc=False)
        data[_('Tags')] = ', '.join(item.tags)
        self.emit(SIGNAL('new_bookdisplay_data(PyQt_PyObject)'), data)

@ -1090,8 +1121,7 @@ class DeviceBooksModel(BooksModel):
                return QVariant(BooksView.human_readable(size))
            elif col == 3:
                dt = self.db[self.map[row]].datetime
-                dt = datetime(*dt[0:6])
-                dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
+                dt = dt_factory(dt, assume_utc=True, as_utc=False)
                return QVariant(strftime(BooksView.TIME_FMT, dt.timetuple()))
            elif col == 4:
                tags = self.db[self.map[row]].tags
--- a/src/calibre/gui2/lrf_renderer/document.py
+++ b/src/calibre/gui2/lrf_renderer/document.py
@ -79,6 +79,8 @@ class _Canvas(QGraphicsRectItem):
        pen = QPen()
        pen.setStyle(Qt.NoPen)
        self.setPen(pen)
+        if not hasattr(self, 'children'):
+            self.children = self.childItems

    def layout_block(self, block, x, y):
        if isinstance(block, TextBlock):
--- a/Show More
+++ b/Show More