updated from main branch

2025-08-30 23:00:21 -04:00 · 2010-02-21 10:02:53 -08:00 · 2010-02-21 10:02:53 -08:00 · c91f022385
commit c91f022385
parent f488c66740 108c53e194
213 changed files with 36495 additions and 27669 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,250 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.6.40
+  date: 2010-02-12
+
+  new features:
+    - title: "Ability to perform exact match and regular expression based searches."
+      type: major
+      tickets: [4830]
+      description: >
+        "You can now perform exact match searches by prefixing your search term with an =.
+        So for example, tag:=fiction will match all tags named fiction, but not tags named
+        non-fiction. Similarly, you can use regular expression based searches by prefixing
+        the search term by ~."
+
+    - title: "Autodetect if a zip/rar file is actually a comic and if so, import it as CBZ/CBR"
+      tickets: [4753]
+
+    - title: "Add plugin to automatically extract an ebook during import if it is in a zip/rar archive"
+
+    - title: "Linux source install: Install a calibre environment module to ease the integration of calibre into other python projects"
+
+  bug fixes:
+    - title: "Fix regression in 0.6.39 that broke the LRF viewer"
+
+    - title: "ZIP/EPUB files: Try to detect file name encoding instead of assuming the name is encoded in UTF-8. Also correctly
+              encode the extracted file name in the local filesystem encoding."
+
+    - title: "HTML Input: Handle HTML fragments more gracefully"
+      tickets: [4854]
+
+    - title: "Zip files: Workaround invalid zip files that contain end-of-file comments but set comment size to zero"
+
+    - title: "Restore the recipe for the Wired daily feed."
+      tickets: [4871]
+
+    - title: "MOBI metadata: Preserve original EXTH records when not overwrriten by calibre metadata."
+
+    - title: "Catalog generation: Improved series sorting. All books not in a series are now grouped together"
+
+    - title: "Fix occassional threading related crash when using the ChooseFormatDialog"
+
+    - title: "Catalog generation: Various fixes for handling invalid data"
+
+  new recipes:
+    - title: Sueddeustche Zeitung 
+      author: Darko Miletic
+
+  improved recipes:
+    - Pagina 12
+    - Variety
+    - Toronto Sun
+    - Telegraph UK
+    - Danas
+    - Dilbert
+
+- version: 0.6.39
+  date: 2010-02-09
+
+  new features:
+    - title: "Add ability to control how author sort strings are automatically generated from author strings, via the config file tweaks.py"
+
+    - title: "Handle broken EPUB files from Project Gutenberg that have invalid OCF containers"
+      tickets: [4832]
+
+  bug fixes:
+    - title: "Fix regression in 0.6.38 that broke setting bookmarks in the viewer"
+
+    - title: "HTML Input: Ignore filenames that are encoded incorerctly."
+
+  new recipes:
+
+    - title: Radikal
+      author: Darko Miletic
+
+
+- version: 0.6.38
+  date: 2010-02-09
+
+  new features:
+    - title: "Driver for the Irex DR 800"
+
+    - title: "Driver for the Booq e-book reader"
+    
+    - title: "Allow automatic series increment algorithm to be tweaked by editing the config file tweaks.py"
+
+    - title: "Various improvements to the catlog generation. Larger thumbnails in EPUB output and better series sorting. Better handling of html markup in the comments."
+
+    - title: "MOBI Output: Make font used for generated masthead images user customizable."
+
+  bug fixes:
+    - title: "E-book viewer: Make bookmarking (and remebering last open position more robust). For linuxsource installs, you must have Qt 4.6"
+      tickets: [4812]
+
+    - title: "Fix conversion/import of HTML files with very long href links on windows"
+      tickets: [4783]
+
+    - title: "Don't read metadata from filenames for download news, even if the user has the read metadata from filename option set"
+      tickets: [4758]
+
+    - title: "Don't allow leading or trailing space in tags and series. Also normalize all internal spaces to a single space"
+      tickets: [4809]
+
+    - title: "E-book viewer: Toolbars remember their position"
+      tickets: [4811]
+
+    - title: "Fix year being repeated when editing date in main library screen on windows"
+      tickets: [4829]
+
+    - title: "New download: Fix downloading of images from URLs with an ampersand in them"
+
+    - title: "Linux source install: unbundle cssutils, it is now an external dependancy"
+
+    - title: "MOBI metadata: Fix regression that broke setting of titles in some MOBI files"
+
+    - title: "EPUB metadata: Extract the cover image from the html it is embededd in if possible, instead of rendering the html. Removes the white margins on covers and speeds up cover extraction"
+
+    - title: "Fix regression in PDB output"
+
+    - title: "News download: Remove <base> tags automatically"
+
+    - title: "Searching on device: Ignore unicode errors"
+
+
+  new recipes:
+    - title: Courier Press
+      author: Krittika Goyal
+
+    - title: zive.sk and iliterature.cz
+      author: Abelturd
+
+    - title: El Comerico, Digital Spy UK, Gizmodo, News Straits Times, Read It Later, TidBits
+      author: Darko Miletic
+
+  improved recipes:
+    - Jerusalem Post
+    - Clarin
+    - La Nacion
+    - Harvard Business Review
+    - People US Mashup
+    - The New Republic
+    - "Pagina 12"
+    - Discover Magazine
+    - Metro Montreal
+
+- version: 0.6.37
+  date: 2010-02-01
+
+  new features:
+    - title: "E-book viewer: Add support for viewing SVG images"
+      type: major
+
+    - title: "Add category of Recently added books when generating catalog in e-book format"
+
+    - title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
+
+    - title: "Add support for masthead images when downloading news for the Kindle"
+
+    - title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
+
+  bug fixes:
+    - title: Changing the date in Dutch
+      tickets: [4732]
+
+    - title: "Fix regression that broke sending files to unupdated PRS 500s"
+
+    - title: "MOBI Input: Ignore width and height percentage measures for <img> tags."
+      tickets: [4726]
+
+    - title: "EPUB Output: Remove <img> tags that point to the internet for their images as this causes the ever delicate ADE to crash."
+      tickets: [4692]
+
+    - title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
+      tickets: [4683]
+
+    - title: "Allow rating to be cleared via the Bulk metadata edit dialog"
+      tickets: [4693]
+
+    - title: "Add workaround for broken linux systems with multiply encoded file names"
+      tickets: [4721]
+
+    - title: Fix bug preventing the the use of indices when setting save to disk templates
+      tickets: [4710]
+
+    - title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
+      tickets: [4707]
+
+    - title: "Catalog generation: Make sorting of numbers in title as text optional"
+
+    - title: "Fix error while sending book with non-ascii character in title/author to device on linux"
+      tickets: [4690]
+
+    - title: "Fix reset cover in edit meta information dialog does not actually remove cover"
+      tickets: [4731]
+
+  new recipes:
+    - title: Kamera Bild
+      author: Darko Miletic
+
+    - title: The Online Photographer
+      author: Darko Miletic
+
+    - title: The Luminous Landscape
+      author: Darko Miletic
+
+    - title: Slovo
+      author: Abelturd
+
+    - title: Various Danish newspapers
+      author: Darko Miletic
+
+    - title: Heraldo de Aragon
+      author: Lorenzo Vigentini
+
+    - title: Orange County Register
+      author: Lorenzi Vigentini
+
+    - title: Open Left
+      author: Xanthan Gum
+
+    - title: Michelle Malkin
+      author: Walt Anthony
+
+    - title: The Metro Montreal
+      author: Jerry Clapperton
+
+    - title: The Gazette
+      author: Jerry Clapperton
+
+    - title: Macleans Magazine
+      author:  Nick Redding
+
+    - title: NY Time Sunday Book Review
+      author: Krittika Goyal
+
+    - title: Various Italian newspapers
+      author: Lorenzo Vigentini
+
+
+  improved recipes:
+    - The Irish Times
+    - Washington Post
+    - NIN
+    - The Discover Magazine
+    - Pagina 12
+
 - version: 0.6.36
  date: 2010-01-25

--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -27,7 +27,7 @@ p.tags {

 p.description {
 	text-align:left;
-	font-style:italic;
+	font-style:normal;
 	margin-top: 0em;
 	}

@ -55,6 +55,14 @@ p.author_index {
 	text-indent: 0em;
 	}

+p.series {
+	text-align: left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:2em;
+	text-indent:-2em;
+	}
+
 p.read_book {
 	text-align:left;
 	margin-top:0px;
@ -71,3 +79,9 @@ p.unread_book {
 	text-indent:-2em;
 	}

+hr.series_divider {
+	width:50%;
+	margin-left:1em;
+	margin-top:0em;
+	margin-bottom:0em;
+	}
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Contains various tweaks that affect calibre behavior. Only edit this file if
+you know what you are dong. If you delete this file, it will be recreated from
+defaults.
+'''
+
+
+# The algorithm used to assign a new book in an existing series a series number.
+# Possible values are:
+# next - Next available number
+# const - Assign the number 1 always
+series_index_auto_increment = 'next'
+
+
+
+# The algorithm used to copy author to author_sort
+# Possible values are:
+#  invert: use "fn ln" -> "ln, fn" (the original algorithm)
+#  copy  : copy author to author_sort without modification
+#  comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
+author_sort_copy_method = 'invert'
--- a/resources/images/catalog.svg
+++ b/resources/images/catalog.svg
@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363)  -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+	 xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
+	 viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
+<filter  id="filter5365">
+	<feGaussianBlur  stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
+</filter>
+<g id="layer1">
+</g>
+<g id="layer2">
+	<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new    " points="167.5,297.005 171.429,297.005 
+		171.429,297.005 	"/>
+	<g id="path5265" filter="url(#filter5365)">
+		<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 		"/>
+		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09 
+			119.953,80.636 70.397,97.084 		"/>
+	</g>
+	<g id="path5267" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
+			l2.322,16.553L118.639,100.902z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
+			c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
+	</g>
+	<g id="path5269" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
+			C68.936,101.726,70.711,98.81,70.711,98.81z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
+			c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
+	</g>
+	<g id="path5271" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
+			C17.974,94.288,17.113,87.874,21.479,79.607z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
+			l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
+	</g>
+	<g id="path5273" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
+			l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
+			l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
+			L120.871,99.092z"/>
+	</g>
+	<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
+	<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
+		c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
+	
+		<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
+		<stop  offset="0" style="stop-color:#B5FFA6"/>
+		<stop  offset="1" style="stop-color:#76E976"/>
+	</radialGradient>
+	<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
+		l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
+	<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
+		M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
+		l0.356,14.644L118.22,97.921z"/>
+	<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
+		C66.471,100.649,68.068,97.629,68.068,97.629z"/>
+	<g id="path5419" filter="url(#filter5365)">
+		<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 		"/>
+		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233 
+			106.738,52.778 57.183,69.227 		"/>
+	</g>
+	<g id="path5421" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
+			l2.322,16.552L105.424,73.045z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
+			c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
+	</g>
+	<g id="path5423" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
+			C55.721,73.869,57.497,70.953,57.497,70.953z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
+			c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
+	</g>
+	<g id="path5425" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
+			C4.759,66.431,3.899,60.017,8.265,51.751z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
+			L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
+	</g>
+	<g id="path5427" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
+			l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
+			L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
+			L107.656,71.234z"/>
+	</g>
+	<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
+		l49.548,18.171L54.54,67.985L6.102,50.193z"/>
+	<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
+		c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
+	
+		<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
+		<stop  offset="0" style="stop-color:#789DED"/>
+		<stop  offset="1" style="stop-color:#2381E8"/>
+	</radialGradient>
+	<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
+		c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
+	<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
+		L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
+		z"/>
+	<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
+		C53.256,72.793,54.854,69.772,54.854,69.772z"/>
+	<g id="path5447" filter="url(#filter5365)">
+		<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 		"/>
+		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305 
+			123.882,28.85 74.326,45.299 		"/>
+	</g>
+	<g id="path5449" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
+			l2.321,16.552L122.567,49.116z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
+			c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
+	</g>
+	<g id="path5451" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
+			C72.863,49.94,74.641,47.024,74.641,47.024z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
+			c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
+	</g>
+	<g id="path5453" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
+			C21.902,42.502,21.042,36.088,25.408,27.822z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
+			L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
+	</g>
+	<g id="path5455" filter="url(#filter5365)">
+		<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
+			l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
+		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
+			c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
+	</g>
+	<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
+	<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
+		c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
+	
+		<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
+		<stop  offset="0" style="stop-color:#FD8A8A"/>
+		<stop  offset="1" style="stop-color:#FF7878"/>
+	</radialGradient>
+	<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
+		c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
+	
+		<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
+		<stop  offset="0" style="stop-color:#FFFFFF"/>
+		<stop  offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
+	</linearGradient>
+	<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
+	<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
+		l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
+		L122.148,46.135z"/>
+	<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
+		C70.399,48.864,71.997,45.844,71.997,45.844z"/>
+</g>
+</svg>
--- a/resources/images/news/digitalspy_uk.png
+++ b/resources/images/news/digitalspy_uk.png
--- a/resources/images/news/elcomercio.png
+++ b/resources/images/news/elcomercio.png
--- a/resources/images/news/gizmodo.png
+++ b/resources/images/news/gizmodo.png
--- a/resources/images/news/kamerabild.png
+++ b/resources/images/news/kamerabild.png
--- a/resources/images/news/newsstraitstimes.png
+++ b/resources/images/news/newsstraitstimes.png
--- a/resources/images/news/radikal_tr.png
+++ b/resources/images/news/radikal_tr.png
--- a/resources/images/news/readitlater.png
+++ b/resources/images/news/readitlater.png
--- a/resources/images/news/sueddeutschezeitung.png
+++ b/resources/images/news/sueddeutschezeitung.png
--- a/resources/images/news/theluminouslandscape.png
+++ b/resources/images/news/theluminouslandscape.png
--- a/resources/images/news/tidbits.png
+++ b/resources/images/news/tidbits.png
--- a/resources/kathemerini.recipe
+++ b/resources/kathemerini.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Kathimerini(BasicNewsRecipe):
+    title                  = 'Kathimerini'
+    __author__             = 'Pan'
+    description            = 'News from Greece'
+    max_articles_per_feed  = 100
+    oldest_article = 100
+    publisher              = 'Kathimerini'
+    category               = 'news, GR'
+    language               = 'el'
+    no_stylesheets         = True
+    remove_tags_before = dict(name='td',attrs={'class':'news'})
+    remove_tags_after = dict(name='td',attrs={'class':'news'})
+    remove_attributes = ['width', 'src','header','footer']
+
+    feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
+  'http://wk.kathimerini.gr/xml_files/politics.xml'),
+ (u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
+  ' http://wk.kathimerini.gr/xml_files/ell.xml'),
+ (u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
+  ' http://wk.kathimerini.gr/xml_files/world.xml'),
+ (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
+  'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
+ (u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
+  'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
+ (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
+  'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
+ (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
+  'http://wk.kathimerini.gr/xml_files/civ.xml'),
+ (u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
+  'http://wk.kathimerini.gr/xml_files/st.xml')]
+
+    def print_version(self, url):
+        return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
+
+
--- a/resources/recipes/ZIVE.sk.recipe
+++ b/resources/recipes/ZIVE.sk.recipe
@ -0,0 +1,45 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+
+class ZiveRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'Abelturd'
+    language = 'sk'
+    version = 1
+
+    title = u'ZIVE.sk'
+    publisher = u''
+    category = u'News, Newspaper'
+    description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
+    encoding = 'UTF-8'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    remove_empty_feeds = True
+
+    no_stylesheets = True
+    remove_javascript = True
+    cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
+
+    feeds = []
+    feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
+
+    preprocess_regexps = [
+        (re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
+        lambda match: ''),
+
+     ]
+
+
+    remove_tags = []
+
+    keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
+    extra_css = '''
+                h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
+                h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
+                '''
+
+
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 clarin.com
 '''
@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe):
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    encoding              = 'cp1252'
    language              = 'es'
-    extra_css             = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
+    masthead_url          = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} '

    conversion_options = {
                          'comment'  : description
--- a/resources/recipes/courrier.recipe
+++ b/resources/recipes/courrier.recipe
@ -0,0 +1,26 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class CourierPress(BasicNewsRecipe):
+    title          = u'Courier Press'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+
+    remove_stylesheets = True
+    remove_tags = [
+       dict(name='iframe'),
+    ]
+
+    feeds          = [
+('Courier Press',
+ 'http://www.courierpress.com/rss/headlines/news/'),
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article_body'})
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -1,64 +1,63 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 danas.rs
 '''
+
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Danas(BasicNewsRecipe):
    title                 = 'Danas'
    __author__            = 'Darko Miletic'
-    description           = 'Vesti'
+    description           = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
    publisher             = 'Danas d.o.o.'
    category              = 'news, politics, Serbia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
+    encoding              = 'utf-8'
+    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
    language              = 'sr'
-    lang                  = 'sr-Latn-RS'
-    direction             = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
-                        , 'pretty_print'     : True
                        }

-
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
    remove_tags = [
                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
                    ,dict(name='div', attrs={'id':'comments'})
-                    ,dict(name=['object','link'])
+                    ,dict(name=['object','link','iframe'])
                  ]

    feeds          = [
-                        (u'Vesti'   , u'http://www.danas.rs/rss/rss.asp'            )
-                       ,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
+                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
+                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
+                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
+                       ,(u'Dijalog'  , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
+                       ,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
+                       ,(u'Svet'     , u'http://www.danas.rs/rss/rss.asp?column_id=25')
+                       ,(u'Srbija'   , u'http://www.danas.rs/rss/rss.asp?column_id=28')
+                       ,(u'Kultura'  , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
+                       ,(u'Sport'    , u'http://www.danas.rs/rss/rss.asp?column_id=13')
+                       ,(u'Scena'    , u'http://www.danas.rs/rss/rss.asp?column_id=42')
+                       ,(u'Feljton'  , u'http://www.danas.rs/rss/rss.asp?column_id=19')
+                       ,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
                     ]

    def preprocess_html(self, soup):
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
+        for item in soup.findAll(style=True):
+            del item['style']
        return soup
+
+    def print_version(self, url):
+        return url + '&action=print'
+
--- a/resources/recipes/digitalspy_uk.recipe
+++ b/resources/recipes/digitalspy_uk.recipe
@ -0,0 +1,43 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.digitalspy.co.uk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DigitalSpyUK(BasicNewsRecipe):
+    title                 = 'Digital Spy - UK Edition'
+    __author__            = 'Darko Miletic'
+    description           = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
+    publisher             = 'Digital Spy Limited.'
+    category              = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en_GB'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags       = [dict(name=['link'])]
+    remove_attributes = ['height','width']
+    keep_only_tags    = [dict(name='div',attrs={'id':'article'})]
+
+    feeds = [
+              (u'News'          , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'          )
+             ,(u'Big Brother'   , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml'   )
+             ,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
+             ,(u'General'       , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml'      )
+             ,(u'Media'         , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml'        )
+            ]
+
--- a/resources/recipes/dilbert.recipe
+++ b/resources/recipes/dilbert.recipe
@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.dilbert.com
 '''
+import re

 from calibre.web.feeds.recipes import BasicNewsRecipe

@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):

    feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]

+    preprocess_regexps = [
+                    (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
+                        lambda match: 'strip.zoom.gif')
+                            ]
+
+
    def get_article_url(self, article):
        return article.get('feedburner_origlink', None)

--- a/resources/recipes/discover_magazine.recipe
+++ b/resources/recipes/discover_magazine.recipe
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'

 '''
-doscovermagazine.com
+discovermagazine.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
@ -19,21 +19,15 @@ class DiscoverMagazine(BasicNewsRecipe):
    oldest_article = 33
    max_articles_per_feed = 20
    no_stylesheets = True
-    remove_javascript     = True
+    remove_javascript = True
    use_embedded_content  = False
    encoding = 'utf-8'
-
    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
    
-    remove_tags_before = dict(id='articlePage')
+    remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
+                   dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]

-    keep_only_tags = [dict(name='div', attrs={'id':'articlePage'})]
-
-    remove_tags = [dict(attrs={'id':['buttons', 'tool-box', 'teaser', 'already-subscriber', 'teaser-suite',  'related-articles', 'relatedItem', 'box-popular', 'box-blogs', 'box-news', 'footer']}),
-                            dict(attrs={'class':'popularNewsBox'}),
-                            dict(name=['img', 'style', 'head'])]
-
-    remove_tags_after = dict(id='articlePage')
+    remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
 
    feeds = [
             (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'), 
--- a/resources/recipes/eksiazki.recipe
+++ b/resources/recipes/eksiazki.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v2'
+__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+'''
+eksiazki.org
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class eksiazki(BasicNewsRecipe):
+
+    title          = u'eksiazki.org'
+    desciption     = u'Twoje centrum wiedzy o epapierze i ebookach'
+    language = 'pl'
+    __author__ = u'Tomasz D\u0142ugosz'
+    no_stylesheets = True
+    remove_javascript = True
+
+    feeds          = [(u'wpisy', u'http://www.eksiazki.org/feed/')]
+
+    keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
+    remove_tags = [
+	    dict(name='span', attrs={'class':'nr_comm'}),
+	    dict(name='div', attrs={'id':'tabsContainer'}),
+        dict(name='div', attrs={'class':'next_previous_links'})]
--- a/resources/recipes/elcomercio.recipe
+++ b/resources/recipes/elcomercio.recipe
@ -0,0 +1,38 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elcomercio.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElComercio(BasicNewsRecipe):
+    title                 = 'El Comercio '
+    __author__            = 'Darko Miletic'
+    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
+    publisher             = 'GRUPO EL COMERCIO C.A.'
+    category              = 'news, Ecuador, politics'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'es'
+    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
+    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+
+    feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
--- a/resources/recipes/gizmodo.recipe
+++ b/resources/recipes/gizmodo.recipe
@ -0,0 +1,40 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+gizmodo.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gizmodo(BasicNewsRecipe):
+    title                 = 'Gizmodo'
+    __author__            = 'Darko Miletic'
+    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
+    publisher             = 'gizmodo.com'
+    category              = 'news, IT, Internet, gadgets'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'en'
+    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
+    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+    remove_tags       = [dict(name='div',attrs={'class':'feedflare'})]
+    remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
+
+    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
-        'mailingListTout', 'partnerCenter', 'pageFooter']),
+        'mailingListTout', 'partnerCenter', 'pageFooter',
+        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
--- a/resources/recipes/iliteratura_cz.recipe
+++ b/resources/recipes/iliteratura_cz.recipe
@ -0,0 +1,47 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class SmeRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'Abelturd'
+    language = 'cz'
+    version = 1
+
+    title = u'iLiteratura.cz'
+    publisher = u''
+    category = u'News, Newspaper'
+    description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
+    cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    remove_empty_feeds = True
+
+    no_stylesheets = True
+    remove_javascript = True
+
+
+    feeds = []
+    feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
+
+
+    keep_only_tags = []
+
+    remove_tags = [dict(name='table'),dict(name='h3')]
+
+
+    preprocess_regexps = [
+        (re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: ''),
+     ]
+
+    def print_version(self, url):
+         m = re.search('(?<=ID=)[0-9]*', url)
+
+         return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
+
+    extra_css = '''
+                  h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
+                  h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
+                '''
--- a/resources/recipes/ilsole24ore.recipe
+++ b/resources/recipes/ilsole24ore.recipe
@ -0,0 +1,67 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini & Edwin van Maastrigt'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
+__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
+
+'''
+http://www.ilsole24ore.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class ilsole(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini & Edwin van Maastrigt'
+    description   = 'Financial news daily paper'
+
+    cover_url      = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
+    title          = u'il Sole 24 Ore '
+    publisher      = 'italiaNews'
+    category       = 'News, finance, economy, politics'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 50
+    use_embedded_content  = False
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def get_article_url(self, article):
+        return article.get('id', article.get('guid', None))
+
+    def print_version(self, url):
+        link, sep, params = url.rpartition('?')
+        return link.replace('.shtml', '_PRN.shtml')
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':'txt'})
+                        ]
+    remove_tags = [dict(name='br')]
+
+    feeds          = [
+                       (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
+                       (u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
+                       (u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
+                       (u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
+                       (u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
+                       (u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
+                       (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
+                       (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
+                       (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
+                       (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
+                     ]
+
+    extra_css = '''
+                html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
+                .linkHighlight {color:#0292c6;}
+                .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
+                .txt p {line-height:18px;}
+                .txt span {line-height:22px;}
+                .title h3 {color:#7b7b7b;}
+                .title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
+                '''
+
--- a/resources/recipes/jpost.recipe
+++ b/resources/recipes/jpost.recipe
@ -10,13 +10,8 @@ class JerusalemPost(BasicNewsRecipe):
    __author__ = 'Kovid Goyal'
    max_articles_per_feed = 10
    no_stylesheets = True
-    remove_tags_before = {'class':'byline'}
-    remove_tags    = [
-                      {'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox',
-                                'slideshow']},
-                       dict(id=['artFontButtons', 'artRelatedBlock']),
-                     ]
-    remove_tags_after = {'id':'artTxtBlock'}
+    remove_tags_before = {'class':'jp-grid-content'}
+    remove_tags_after = {'id':'body_val'}

    feeds =  [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
               ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
@ -25,7 +20,9 @@ class JerusalemPost(BasicNewsRecipe):
               ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
          ]

-    def postprocess_html(self, soup, first):
-        for tag in soup.findAll(name=['table', 'tr', 'td']):
-            tag.name = 'div'
+    def preprocess_html(self, soup):
+        for x in soup.findAll(name=['form', 'input']):
+            x.name = 'div'
+        for x in soup.findAll('body', style=True):
+            del x['style']
        return soup
--- a/resources/recipes/kamerabild.recipe
+++ b/resources/recipes/kamerabild.recipe
@ -0,0 +1,46 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.kamerabild.se
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kamerabild(BasicNewsRecipe):
+    title                 = 'Kamera & Bild'
+    __author__            = 'Darko Miletic'
+    description           = 'Photo News from Sweden'
+    publisher             = 'politiken.dk'
+    category              = 'news, photograph, Sweden'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'sv'
+
+    extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
+    keep_only_tags     = [dict(name='div',attrs={'class':'container'})]
+    remove_tags_after = dict(name='div',attrs={'class':'editor'})
+    remove_tags        = [
+                            dict(name=['object','link','iframe'])
+                           ,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
+                         ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lanacion.com.ar
 '''
@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe):
    title                 = 'La Nacion'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y el resto del mundo'
-    publisher             = 'La Nacion'
+    publisher             = 'La Nacion S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    use_embedded_content  = False
-    remove_javascript     = True
    no_stylesheets        = True
+    language              = 'es'
+    encoding              = 'cp1252'
+    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
+    extra_css             = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} '

-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]

-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }

    keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
                    ,dict(name='ul'  , attrs={'class':'cajaHerramientas cajaTop noprint'})
                    ,dict(name='div' , attrs={'class':'cajaHerramientas noprint'        })
+                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color']})
+                    ,dict(name=['iframe','embed','object'])
                  ]
+    remove_attributes = ['height','width']

    feeds          = [
                         (u'Ultimas noticias'     , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2'         )
@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe):
                     ]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-
-    language = 'es'
+        return self.adeify_images(soup)
--- a/resources/recipes/lescienze.recipe
+++ b/resources/recipes/lescienze.recipe
@ -0,0 +1,89 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '10, January 2010'
+__description__ = 'Monthly Italian edition of Scientific American'
+
+'''
+http://lescienze.espresso.repubblica.it/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class leScienze(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Monthly Italian edition of Scientific American'
+
+    cover_url      = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
+    title          = 'le Scienze'
+    publisher      = 'Gruppo editoriale lEspresso'
+    category       = 'Science, general interest'
+
+    language       = 'it'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 31
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':'bigbox'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='span',attrs={'class':'linkindice'}),
+                            dict(name='div',attrs={'class':'box-commenti'}),
+                            dict(name='div',attrs={'id':['rssdiv','blocco']})
+                         ]
+    remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
+
+    feeds          = [
+                       (u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
+                       (u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
+                       (u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
+                       (u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
+                       (u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
+                       (u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
+                       (u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
+                       (u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
+                       (u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
+                       (u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
+                       (u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
+                       (u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
+                       (u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
+                       (u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
+                       (u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
+                       (u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
+                       (u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
+                       (u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
+                       (u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
+                       (u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
+                       (u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
+                       (u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
+                       (u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
+                       (u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
+                     ]
+
+    extra_css = '''
+                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
+                .titolo {font-weight:bold;}
+                .label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
+                .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
+                .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
+                '''
+
+
+
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@ -4,21 +4,26 @@ class Metro_Montreal(BasicNewsRecipe):

    title          = u'M\xe9tro Montr\xe9al'
    __author__     = 'Jerry Clapperton'
-    description    = u'Le quotidien le plus branch\xe9 sur le monde'
-    language = 'fr'
+    description    = 'Le quotidien le plus branch\xe9 sur le monde'
+    language       = 'fr'

-    oldest_article = 7
+    oldest_article        = 7
    max_articles_per_feed = 20
    use_embedded_content  = False
-    remove_javascript = True
-    no_stylesheets = True
-    encoding = 'utf-8'
+    remove_javascript     = True
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    extra_css             = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'

-    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+    remove_tags = [dict(attrs={'id':'buttons'})]

-    remove_tags = [dict(attrs={'id':'buttons'}), dict(name=['img', 'style'])]
-
-    feeds = [(u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), (u'Culture', u'http://journalmetro.com/culture/rss'), (u'Sports', u'http://journalmetro.com/sports/rss'), (u'Paroles', u'http://journalmetro.com/paroles/rss')]
+    feeds = [
+             (u"L'info", u'http://journalmetro.com/linfo/rss'),
+             (u'Monde', u'http://journalmetro.com/monde/rss'),
+             (u'Culture', u'http://journalmetro.com/culture/rss'),
+             (u'Sports', u'http://journalmetro.com/sports/rss'),
+             (u'Paroles', u'http://journalmetro.com/paroles/rss')
+            ]

    def print_version(self, url):
          return url.replace('article', 'ArticlePrint') + '?language=fr'
--- a/resources/recipes/newsstraitstimes.recipe
+++ b/resources/recipes/newsstraitstimes.recipe
@ -0,0 +1,35 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.nst.com.my
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Newstraitstimes(BasicNewsRecipe):
+    title                 = 'New Straits Times from Malaysia'
+    __author__            = 'Darko Miletic'
+    description           = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
+    publisher             = 'nst.com.my'
+    category              = 'news, politics, Malaysia'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en'
+    masthead_url          = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags       = [dict(name=['link','table'])]
+    keep_only_tags = dict(name='div',attrs={'id':'haidah'})
+
+    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
+
--- a/resources/recipes/nin.recipe
+++ b/resources/recipes/nin.recipe
@ -74,7 +74,6 @@ class Nin(BasicNewsRecipe):
            feedpage = self.index_to_soup(feedlink)
            self.report_progress(0, _('Fetching feed')+' %s...'%(section))
            inarts   = []
-            count2 = 0
            for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
                alink = art.parent
                url   = self.PREFIX + alink['href']
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
+import re, time
 from calibre import entity_to_unicode
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
                   dict(name=['script', 'noscript', 'style'])]
    encoding = decode
    no_stylesheets = True
-    extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+    extra_css = 'h1 {font-face:sans-serif; font-size:2em; font-weight:bold;}\n.byline {font:monospace;}\n.bold {font-weight:bold;}'

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
--- a/resources/recipes/nytimesbook.recipe
+++ b/resources/recipes/nytimesbook.recipe
@ -0,0 +1,56 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class NewYorkTimesBookReview(BasicNewsRecipe):
+    title          = u'New York Times Book Review'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 8 #days
+    max_articles_per_feed = 1000
+    recursions = 2
+    #encoding = 'latin1'
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='div', attrs={'id':'authorId'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
+       dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       #dict(name='ul', attrs={'class':'articleTools'}),
+    ]
+    match_regexps = [
+            r'http://www.nytimes.com/.+pagewanted=[2-9]+'
+            ]
+
+    feeds          = [
+('New York Times Sunday Book Review',
+ 'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
+]
+
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        #for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x):
+             #p = x.findParent('p')
+             #if p is not None:
+                  #p.extract()
+        return soup
+
+    def postprocess_html(self, soup, first):
+        for div in soup.findAll(id='pageLinks'):
+            div.extract()
+        if not first:
+            h1 = soup.find('h1')
+            if h1 is not None:
+                h1.extract()
+            t = soup.find(attrs={'class':'timestamp'})
+            if t is not None:
+                t.extract()
+        return soup
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -1,13 +1,12 @@
-
 __license__   = 'GPL v3'
 __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pagina12.com.ar
 '''

-import time
-from calibre import strftime
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class Pagina12(BasicNewsRecipe):
    title                 = 'Pagina - 12'
@ -16,13 +15,14 @@ class Pagina12(BasicNewsRecipe):
    publisher             = 'La Pagina S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
-    extra_css             = ' body{font-family: sans-serif} '
+    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '

    conversion_options = {
                          'comment'   : description
@ -45,14 +45,24 @@ class Pagina12(BasicNewsRecipe):
             ,(u'NO'             , u'http://www.pagina12.com.ar/diario/rss/no.xml'          )
             ,(u'Las/12'         , u'http://www.pagina12.com.ar/diario/rss/las12.xml'       )
             ,(u'Soy'            , u'http://www.pagina12.com.ar/diario/rss/soy.xml'         )
-             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
+             ,(u'Futuro'         , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
+             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/m2.xml'          )
+             ,(u'Rosario/12'     , u'http://www.pagina12.com.ar/diario/rss/rosario.xml'     )
            ]

    def print_version(self, url):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')

    def get_cover_url(self):
-        imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
-        weekday = time.localtime().tm_wday
-        return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
+        rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
+        rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
+        soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
+        for image in soup.findAll('img',alt=True):
+           if image['alt'].startswith('Tapa de la fecha'):
+              return image['src']
+        return None

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/people_us_mashup.recipe
+++ b/resources/recipes/people_us_mashup.recipe
@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
         dict(name='div', attrs={'class':'sharelinkcont'}),
         dict(name='div', attrs={'class':'categories'}),
         dict(name='ul', attrs={'class':'categories'}),
+         dict(name='div', attrs={'class':'related_content'}),
         dict(name='div', attrs={'id':'promo'}),
         dict(name='div', attrs={'class':'linksWrapper'}),
         dict(name='p', attrs={'class':'tag tvnews'}),
--- a/resources/recipes/radikal_tr.recipe
+++ b/resources/recipes/radikal_tr.recipe
@ -0,0 +1,45 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+radikal.com.tr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Radikal_tr(BasicNewsRecipe):
+    title                 = 'Radikal - Turkey'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Turkey'
+    publisher             = 'radikal'
+    category              = 'news, politics, Turkey'
+    oldest_article        = 2
+    max_articles_per_feed = 150
+    no_stylesheets        = True
+    encoding              = 'cp1254'
+    use_embedded_content  = False
+    masthead_url          = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
+    language              = 'tr'
+    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif } '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['embed','iframe','object','link','base'])]
+    remove_tags_before = dict(name='h1')
+    remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
+
+
+    feeds = [(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')]
+
+    def print_version(self, url):
+        articleid = url.rpartition('ArticleID=')[2]
+        return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
--- a/resources/recipes/readitlater.recipe
+++ b/resources/recipes/readitlater.recipe
@ -0,0 +1,64 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+readitlaterlist.com
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Readitlater(BasicNewsRecipe):
+    title                 = 'Read It Later'
+    __author__            = 'Darko Miletic'
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to
+                               setup up your news. Fill in your account
+                               username, and optionally you can add password.'''
+    publisher             = 'readitlater.com'
+    category              = 'news, custom'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    needs_subscription    = True
+    INDEX                 = u'http://readitlaterlist.com'
+    LOGIN                 = INDEX + u'/l'
+
+
+    feeds = [(u'Unread articles' , INDEX + u'/unread')]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['feed_id'] = self.username
+            if self.password is not None:
+               br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            ritem = soup.find('ul',attrs={'id':'list'})
+            for item in ritem.findAll('li'):
+                description = ''
+                atag = item.find('a',attrs={'class':'text'})
+                if atag and atag.has_key('href'):
+                    url         = self.INDEX + atag['href']
+                    title       = self.tag_to_string(item.div)
+                    date        = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
--- a/resources/recipes/sueddeutschezeitung.recipe
+++ b/resources/recipes/sueddeutschezeitung.recipe
@ -0,0 +1,107 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.sueddeutsche.de/sz/
+'''
+
+import urllib
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SueddeutcheZeitung(BasicNewsRecipe):
+    title                  = 'Sueddeutche Zeitung'
+    __author__             = 'Darko Miletic'
+    description            = 'News from Germany. Access to paid content.'
+    publisher              = 'Sueddeutche Zeitung'
+    category               = 'news, politics, Germany'
+    no_stylesheets         = True
+    oldest_article         = 2
+    encoding               = 'cp1252'
+    needs_subscription     = True
+    remove_empty_feeds     = True
+    PREFIX                 = 'http://www.sueddeutsche.de'
+    INDEX                  = PREFIX + strftime('/sz/%Y-%m-%d/')
+    LOGIN                  = PREFIX + '/app/lbox/index.html'
+    use_embedded_content   = False
+    masthead_url           = 'http://pix.sueddeutsche.de/img/g_.gif'
+    language               = 'de'
+    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+    remove_attributes = ['height','width']
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open(self.INDEX)
+        if self.username is not None and self.password is not None:
+            data = urllib.urlencode({ 'login_name':self.username
+                                     ,'login_passwort':self.password
+                                     ,'lboxaction':'doLogin'
+                                     ,'passtxt':'Passwort'
+                                     ,'referer':self.INDEX
+                                     ,'x':'22'
+                                     ,'y':'7'
+                                   })
+            br.open(self.LOGIN,data)
+        return br
+
+    remove_tags        =[
+                         dict(attrs={'class':'hidePrint'})
+                        ,dict(name=['link','object','embed','base','iframe'])
+                        ]
+    remove_tags_before = dict(name='h2')
+    remove_tags_after  = dict(attrs={'class':'author'})
+
+    feeds = [
+               (u'Politik'      , INDEX + 'politik/'      )
+              ,(u'Seite drei'   , INDEX + 'seitedrei/'    )
+              ,(u'Meinungsseite', INDEX + 'meinungsseite/')
+              ,(u'Wissen'       , INDEX + 'wissen/'       )
+              ,(u'Panorama'     , INDEX + 'panorama/'     )
+              ,(u'Feuilleton'   , INDEX + 'feuilleton/'   )
+              ,(u'Medien'       , INDEX + 'medien/'       )
+              ,(u'Wirtschaft'   , INDEX + 'wirtschaft/'   )
+              ,(u'Sport'        , INDEX + 'sport/'        )
+              ,(u'Bayern'       , INDEX + 'bayern/'       )
+              ,(u'Muenchen'     , INDEX + 'muenchen/'     )
+              ,(u'jetzt.de'     , INDEX + 'jetzt.de/'     )
+            ]
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            tbl = soup.find(attrs={'class':'szprintd'})
+            for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
+                atag    = item.find(attrs={'class':'Titel'}).a
+                ptag    = item.find('p')
+                stag    = ptag.find('script')
+                if stag:
+                   stag.extract()
+                url           = self.PREFIX + atag['href']
+                title         = self.tag_to_string(atag)
+                description   = self.tag_to_string(ptag)
+                articles.append({
+                                      'title'      :title
+                                     ,'date'       :strftime(self.timefmt)
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
+    def print_version(self, url):
+        return url + 'print.html'
+
--- a/resources/recipes/telegraph_uk.recipe
+++ b/resources/recipes/telegraph_uk.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class TelegraphUK(BasicNewsRecipe):
    title                 = u'Telegraph.co.uk'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Darko Miletic and Sujata Raman'
    description           = 'News from United Kingdom'
    oldest_article        = 7
    max_articles_per_feed = 100
@ -18,23 +18,26 @@ class TelegraphUK(BasicNewsRecipe):

    use_embedded_content  = False

-    extra_css = '''
-                h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
-                h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444}
-                .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
-                .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                a{color:#234B7B; }
-                .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                '''
+    extra_css           = '''
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
+                        .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        a{color:#234B7B; }
+                        .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        '''

-    keep_only_tags    = [ 
+    keep_only_tags      = [
                           dict(name='div', attrs={'class':'storyHead'})
                          ,dict(name='div', attrs={'class':'story'    })
                          #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ]   })
-                        ]
-    remove_tags    = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})]
+                          ]
+    remove_tags         = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
+                          #,dict(name='div', attrs={'class':['toolshideoneQuarter']})
+                          ,dict(name='span', attrs={'class':['num','placeComment']})
+                          ]

-    feeds          = [
+    feeds               = [
                         (u'UK News'        , u'http://www.telegraph.co.uk/news/uknews/rss'                                      )
                        ,(u'World News'     , u'http://www.telegraph.co.uk/news/worldnews/rss'                                   )
                        ,(u'Politics'       , u'http://www.telegraph.co.uk/news/newstopics/politics/rss'                         )
@ -45,7 +48,7 @@ class TelegraphUK(BasicNewsRecipe):
                        ,(u'Earth News'     , u'http://www.telegraph.co.uk/earth/earthnews/rss'                                  )
                        ,(u'Comment'        , u'http://www.telegraph.co.uk/comment/rss'                                          )
                        ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss'                     )
-                     ]
+                         ]

    def get_article_url(self, article):

@ -57,3 +60,15 @@ class TelegraphUK(BasicNewsRecipe):
        return url


+    def postprocess_html(self,soup,first):
+
+        for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
+            for pTag in bylineTag.findAll(name='p'):
+                if getattr(pTag.contents[0],"Comments",True):
+                    pTag.extract()
+        return soup
+
+
+
+
+
--- a/resources/recipes/the_gazette.recipe
+++ b/resources/recipes/the_gazette.recipe
@ -1,22 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class The_Gazette(BasicNewsRecipe):
-
-    cover_url      = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
-    title          = u'The Gazette'
-    __author__     = 'Jerry Clapperton'
-    description    = 'Montreal news in English'
-    language = 'en_CA'
-
-    oldest_article = 7
-    max_articles_per_feed = 20
-    use_embedded_content  = False
-    remove_javascript = True
-    no_stylesheets = True
-    encoding = 'utf-8'
-
-    keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
-
-    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
-
-    feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
--- a/resources/recipes/the_new_republic.recipe
+++ b/resources/recipes/the_new_republic.recipe
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):

    oldest_article = 7
    max_articles_per_feed = 100
+    no_stylesheets = True

    remove_tags = [
            dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
        ('Economy', 'http://www.tnr.com/rss/articles/Economy'),
        ('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
        ('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
-        ('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
+        ('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
        ('World', 'http://www.tnr.com/rss/articles/World'),
        ('Film', 'http://www.tnr.com/rss/articles/Film'),
        ('Books', 'http://www.tnr.com/rss/articles/books'),
+        ('The Book', 'http://www.tnr.com/rss/book'),
+        ('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
        ('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
        ('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
        ('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
-        ('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
        ('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
        ('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
        ('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):

    def print_version(self, url):
        return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
+
--- a/resources/recipes/theluminouslandscape.recipe
+++ b/resources/recipes/theluminouslandscape.recipe
@ -0,0 +1,37 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+luminous-landscape.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class theluminouslandscape(BasicNewsRecipe):
+    title                 = 'The Luminous Landscape'
+    __author__            = 'Darko Miletic'
+    description           = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
+    publisher             = 'The Luminous Landscape '
+    category              = 'news, blog, photograph, international'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = True
+    encoding              = 'cp1252'
+    language              = 'en'
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
+    remove_tags        = [dict(name=['object','link','iframe'])]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/theonlinephotographer.recipe
+++ b/resources/recipes/theonlinephotographer.recipe
@ -0,0 +1,41 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+theonlinephotographer.typepad.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class theonlinephotographer(BasicNewsRecipe):
+    title                 = 'The Online Photographer'
+    __author__            = 'Darko Miletic'
+    description           = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
+    publisher             = 'The Online Photographer'
+    category              = 'news, blog, photograph, international'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_empty_feeds    = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'en'
+
+    extra_css = ' body{font-family: Georgia,"Times New Roman",serif } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    feeds              = [(u'Articles', u'http://feeds.feedburner.com/typepad/ZSjz')]
+    remove_tags_before = dict(name='h3',attrs={'class':'entry-header'})
+    remove_tags_after  = dict(name='div',attrs={'class':'entry-footer'})
+    remove_tags        = [dict(name=['object','link','iframe'])]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/tidbits.recipe
+++ b/resources/recipes/tidbits.recipe
@ -0,0 +1,53 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+db.tidbits.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TidBITS(BasicNewsRecipe):
+    title                 = 'TidBITS: Mac News for the Rest of Us'
+    __author__            = 'Darko Miletic'
+    description           = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
+    publisher             = 'TidBITS Publishing Inc.'
+    category              = 'news, Apple, Macintosh, IT, Internet'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'en'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://db.tidbits.com/images/tblogo9.gif'
+    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+    remove_tags       = [dict(name='small')]
+    remove_tags_after = dict(name='small')
+
+    feeds = [
+               (u'Business Apps'              , u'http://db.tidbits.com/feeds/business.rss'     )
+              ,(u'Entertainment'              , u'http://db.tidbits.com/feeds/entertainment.rss')
+              ,(u'External Links'             , u'http://db.tidbits.com/feeds/links.rss'        )
+              ,(u'Home Mac'                   , u'http://db.tidbits.com/feeds/home.rss'         )
+              ,(u'Inside TidBITS'             , u'http://db.tidbits.com/feeds/inside.rss'       )
+              ,(u'iPod & iPhone'              , u'http://db.tidbits.com/feeds/ipod-iphone.rss'  )
+              ,(u'Just for Fun'               , u'http://db.tidbits.com/feeds/fun.rss'          )
+              ,(u'Macs & Mac OS X'            , u'http://db.tidbits.com/feeds/macs.rss'         )
+              ,(u'Media Creation'             , u'http://db.tidbits.com/feeds/creative.rss'     )
+              ,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'          )
+              ,(u'Opinion & Editorial'        , u'http://db.tidbits.com/feeds/opinion.rss'      )
+              ,(u'Support & Problem Solving'  , u'http://db.tidbits.com/feeds/support.rss'      )
+              ,(u'Safe Computing'             , u'http://db.tidbits.com/feeds/security.rss'     )
+              ,(u'Tech News'                  , u'http://db.tidbits.com/feeds/tech.rss'         )
+              ,(u'Software Watchlist'         , u'http://db.tidbits.com/feeds/watchlist.rss'    )
+            ]
--- a/resources/recipes/toronto_sun.recipe
+++ b/resources/recipes/toronto_sun.recipe
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class TorontoSun(BasicNewsRecipe):
    title                 = 'Toronto SUN'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Darko Miletic and Sujata Raman'
    description           = 'News from Canada'
    publisher             = 'Toronto Sun'
    category              = 'news, politics, Canada'
@ -21,25 +21,50 @@ class TorontoSun(BasicNewsRecipe):
    encoding              = 'cp1252'
    language              = 'en_CA'

-    conversion_options = {
-                          'comment'   : description
-                        , 'tags'      : category
-                        , 'publisher' : publisher
-                        , 'language'  : language
-                        }
+    conversion_options  = {
+                              'comment'   : description
+                            , 'tags'      : category
+                            , 'publisher' : publisher
+                            , 'language'  : language
+                          }

-    keep_only_tags    =[
-                         dict(name='div', attrs={'class':'articleHead'})
-                         ,dict(name='div', attrs={'id':'channelContent'})
-                       ]
-    remove_tags = [
-                      dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
-                     ,dict(name=['link','iframe','object'])
-                     ,dict(name='a',attrs={'rel':'swap'})
-                     ,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
-                  ]
+    keep_only_tags      = [
+                               dict(name='div', attrs={'class':['articleHead','leftBox']})
+                              ,dict(name='div', attrs={'id':'channelContent'})
+                              ,dict(name='div', attrs={'id':'rotateBox'})
+                              ,dict(name='img')
+                          ]
+    remove_tags         = [
+                              dict(name='div',attrs={'class':['bottomBox clear','bottomBox','breadCrumb','articleControls thin','articleControls thin short','extraVideoList']})
+                             ,dict(name='h2',attrs={'class':'microhead'})
+                             ,dict(name='div',attrs={'id':'commentsBottom'})
+                             ,dict(name=['link','iframe','object'])
+                             ,dict(name='a',attrs={'rel':'swap'})
+                             ,dict(name='a',attrs={'href':'/news/haiti/'})
+                             ,dict(name='ul',attrs={'class':['tabs dl contentSwap','micrositeNav clearIt hList','galleryNav rotateNav']})
+                          ]
+
+    remove_tags_after   = [
+                            dict(name='div',attrs={'class':'bottomBox clear'})
+                           ,dict(name='div',attrs={'class':'rotateBox'})
+                           ,dict(name='div',attrs={'id':'contentSwap'})
+                          ]
+
+
+    extra_css = '''
+                h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
+                h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                .bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
+                .subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
+                .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                .byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
+                .updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                .galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                .galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                '''

-    remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})

    feeds = [
              (u'News'       , u'http://www.torontosun.com/news/rss.xml'           )
@ -48,3 +73,19 @@ class TorontoSun(BasicNewsRecipe):
             ,(u'World'      , u'http://www.torontosun.com/news/world/rss.xml'     )
             ,(u'Money'      , u'http://www.torontosun.com/money/rss.xml'          )
            ]
+
+    def preprocess_html(self, soup):
+        ##To fetch images from the specified source
+        for img in soup.findAll('img', src=True):
+            url= img.get('src').split('?')[-1].partition('=')[-1]
+            if url:
+                img['src'] = url.split('&')[0].partition('=')[0]
+                img['width'] = url.split('&')[-1].partition('=')[-1].split('x')[0]
+                img['height'] =url.split('&')[-1].partition('=')[-1].split('x')[1]
+        return soup
+
+
+
+
+
+
--- a/resources/recipes/tuttosport.recipe
+++ b/resources/recipes/tuttosport.recipe
@ -0,0 +1,66 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '30, January 2010'
+__description__ = 'Sport daily news from Italy'
+
+'''www.tuttosport.com'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class tuttosport(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Sport daily news from Italy'
+
+    cover_url      = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
+    title          = 'Tuttosport'
+    publisher      = 'Nuova Editoriale Sportiva S.r.l'
+    category       = 'Sport News'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    no_stylesheets = True
+
+    def print_version(self,url):
+        segments = url.split('/')
+        printURL = '/'.join(segments[0:10]) + '?print'
+        return printURL
+
+    keep_only_tags = [
+                    dict(name='h2', attrs={'class':'tit_Article'}),
+                    dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
+                      ]
+
+    feeds       = [
+                   (u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
+                   (u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
+                   (u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
+                   (u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
+                   (u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
+                   (u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
+                   (u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
+                   (u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
+                   (u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
+                   (u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
+                   (u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
+                  ]
+
+    extra_css = '''
+                body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
+                h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+                h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
+                .txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
+                .txt_Article {clear: both; margin: 8px 8px 12px;}
+                .txt_Author {float: right;}
+                .txt_ArticleAuthor {clear: both; margin: 8px;}
+                '''
--- a/resources/recipes/variety.recipe
+++ b/resources/recipes/variety.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.variety.com
 '''
@ -20,6 +18,8 @@ class Variety(BasicNewsRecipe):
    publisher              = 'Red Business Information'
    category               = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
    language               = 'en'
+    masthead_url           = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
+    extra_css              = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '

    conversion_options = {
                             'comments'  : description
@ -41,6 +41,6 @@ class Variety(BasicNewsRecipe):
        catid = catidr.partition('&')[0]
        return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid

-    def get_article_url(self, article):
-        return article.get('feedburner_origlink',  None)

+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/winter_olympics.recipe
+++ b/resources/recipes/winter_olympics.recipe
@ -0,0 +1,95 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Starson17'
+'''
+www.nbcolympics.com
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Olympics_2010(BasicNewsRecipe):
+    title          = u'NBC Olympics 2010'
+    __author__  = 'Starson17'
+    description = 'Olympics 2010'
+    cover_url     = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
+    publisher      = 'Olympics 2010'
+    tags           = 'Olympics news'
+    language = 'en'
+    use_embedded_content    = False
+    no_stylesheets        = True
+    remove_javascript = True
+    # recursions = 3
+    oldest_article        = 7
+    max_articles_per_feed = 10
+
+    keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}),
+                      ]
+
+    remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}),
+                   dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}),
+                   ]
+
+    # RSS feeds are at: http://www.nbcolympics.com/rss/index.html
+    feeds = [
+             ('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
+             ('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
+             ('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
+             # ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
+             # ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
+             # ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
+             ('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
+             # ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
+             # ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
+             # ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
+             # ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
+             ('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
+             # ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
+             # ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
+             ('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
+             # ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
+             # ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
+             ('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
+             # ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
+             # ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
+             ('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
+             # ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
+             # ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
+             ('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
+             # ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
+             # ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
+             ('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
+             # ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
+             # ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
+             ('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
+             # ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
+             # ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
+             ('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
+             # ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
+             # ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
+             ('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
+             # ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
+             # ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
+             ('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
+             # ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
+             # ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
+             ('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
+             # ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
+             # ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
+             ('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
+             # ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
+             # ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
+             ('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
+             # ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
+             # ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
+             ('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
+             # ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
+             # ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
+             ('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
+             ]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        '''
--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
+    masthead_url          = 'http://www.wired.com/images/home/wired_logo.gif'
    language              = 'en'
    extra_css             = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
    index                 = 'http://www.wired.com/magazine/'
@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe):
                     dict(name=['object','embed','iframe','link'])
                    ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
                  ]
+    remove_attributes = ['height','width']              


-    #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
-
    def parse_index(self):
        totalfeeds = []

-        soup = self.index_to_soup(self.index)
+        soup   = self.index_to_soup(self.index)
+        majorf = soup.find('div',attrs={'class':'index'})
+        if majorf:
+           pfarticles = []
+           firsta = majorf.find(attrs={'class':'spread-header'})
+           if firsta:
+              pfarticles.append({
+                                  'title'      :self.tag_to_string(firsta.a)
+                                 ,'date'       :strftime(self.timefmt)
+                                 ,'url'        :'http://www.wired.com' + firsta.a['href']
+                                 ,'description':''
+                                })
+           for itt in majorf.findAll('li'):
+               itema = itt.find('a',href=True)
+               if itema:
+                  pfarticles.append({
+                                      'title'      :self.tag_to_string(itema)
+                                     ,'date'       :strftime(self.timefmt)
+                                     ,'url'        :'http://www.wired.com' + itema['href']
+                                     ,'description':''
+                                    })
+           totalfeeds.append(('Cover', pfarticles))
        features = soup.find('div',attrs={'id':'my-glider'})
        if features:
           farticles = []
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@ -0,0 +1,44 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Wired_Daily(BasicNewsRecipe):
+
+    title = 'Wired Daily Edition'
+    __author__ = 'Kovid Goyal'
+    description = 'Technology news'
+    timefmt  = ' [%Y%b%d  %H%M]'
+    language = 'en'
+
+    no_stylesheets = True
+
+    remove_tags_before = dict(name='div', id='content')
+    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
+        'footer', 'advertisement', 'blog_subscription_unit',
+        'brightcove_component']),
+        {'class':'entryActions'},
+        dict(name=['noscript', 'script'])]
+
+    feeds = [
+        ('Top News', 'http://feeds.wired.com/wired/index'),
+        ('Culture', 'http://feeds.wired.com/wired/culture'),
+        ('Software', 'http://feeds.wired.com/wired/software'),
+        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
+        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
+        ('Cars', 'http://feeds.wired.com/wired/cars'),
+        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
+        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
+        ('Science', 'http://feeds.wired.com/wired/science'),
+        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
+        ('Politics', 'http://feeds.wired.com/wired/politics'),
+        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
+        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
+        ]
+
+    def print_version(self, url):
+        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
+
+
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
                # first, check if there is an h3 tag which provides a section name
                stag = divtag.find('h3')
                if stag:
-                    if stag.parent['class'] == 'dynamic':
+                    if stag.parent.get('class', '') == 'dynamic':
                        # a carousel of articles is too complex to extract a section name
                        # for each article, so we'll just call the section "Carousel"
                        section_name = 'Carousel'
--- a/resources/tanea.recipe
+++ b/resources/tanea.recipe
@ -0,0 +1,30 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class TaNea(BasicNewsRecipe):
+    title          = u'Ta Nea'
+    __author__             = 'Pan'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    no_stylesheets         = True
+
+    remove_tags_before = dict(name='div',attrs={'id':'print-body'})
+    remove_tags_after = dict(name='div',attrs={'id':'text'})
+
+    feeds = [
+        (u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
+        u'http://www.tanea.gr/default.asp?pid=66&la=1'),
+        (u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=67&la=1'),
+        (u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
+        u'http://www.tanea.gr/default.asp?pid=68&la=1'),
+        (u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=69&la=1'),
+        (u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=79&la=1'),
+        (u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=80&la=1'),
+        (u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
+        u'http://www.tanea.gr/default.asp?pid=81&la=1')]
+
+    def print_version(self, url):
+        return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
--- a/resources/viewer/bookmarks.js
+++ b/resources/viewer/bookmarks.js
@ -20,37 +20,8 @@ function selector(elem) {
    return sel;
 }

-function find_closest_enclosing_block(top) {
-    var START = top-1000;
-    var STOP = top;
-    var matches = [];
-    var elem, temp;
-    var width = 1000;
-
-    for (y = START; y < STOP; y += 20) {
-        for ( x = 0; x < width; x += 20) {
-            elem = document.elementFromPoint(x, y);
-            try {
-                elem = $(elem);
-                temp = elem.offset().top
-                matches.push(elem);
-                if (Math.abs(temp - START) < 25) { y = STOP; break}
-            } catch(error) {}
-        }
-    }
-
-    var miny = Math.abs(matches[0].offset().top - START), min_elem = matches[0];
-
-    for (i = 1; i < matches.length; i++) {
-        elem = matches[i];
-        temp = Math.abs(elem.offset().top - START);
-        if ( temp < miny ) { miny = temp; min_elem = elem; }
-    }
-    return min_elem;
-}
-
-function calculate_bookmark(y) {
-    var elem = find_closest_enclosing_block(y);
+function calculate_bookmark(y, node) {
+    var elem = $(node);
    var sel = selector(elem);
    var ratio = (y - elem.offset().top)/elem.height();
    if (ratio > 1) { ratio = 1; }
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
            objects.append(obj)

        if self.newer(dest, objects):
-            cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
+            cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
            '-lpng', '-lpthread']
            if iswindows:
                cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
--- a/setup/install.py
+++ b/setup/install.py
@ -137,8 +137,20 @@ class Develop(Command):
        self.setup_mount_helper()
        self.install_files()
        self.run_postinstall()
+        self.install_env_module()
        self.success()

+    def install_env_module(self):
+        import distutils.sysconfig as s
+        libdir = s.get_python_lib(prefix=self.opts.staging_root)
+        if os.path.exists(libdir):
+            path = os.path.join(libdir, 'init_calibre.py')
+            self.info('Installing calibre environment module: '+path)
+            with open(path, 'wb') as f:
+                f.write(HEADER.format(**self.template_args()))
+        else:
+            self.warn('Cannot install calibre environment module to: '+libdir)
+
    def setup_mount_helper(self):
        def warn():
            self.warn('Failed to compile mount helper. Auto mounting of',
@ -180,13 +192,20 @@ class Develop(Command):
                    functions[typ]):
                self.write_template(name, mod, func)

+    def template_args(self):
+        return {
+            'path':self.libdir,
+            'resources':self.sharedir,
+            'executables':self.bindir,
+            'extensions':self.j(self.libdir, 'calibre', 'plugins')
+            }
+
    def write_template(self, name, mod, func):
        template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
-        script = template.format(
-                module=mod, func=func,
-                path=self.libdir, resources=self.sharedir,
-                executables=self.bindir,
-                extensions=self.j(self.libdir, 'calibre', 'plugins'))
+        args = self.template_args()
+        args['module'] = mod
+        args['func'] = func
+        script = template.format(**args)
        path = self.j(self.staging_bindir, name)
        if not os.path.exists(self.staging_bindir):
            os.makedirs(self.staging_bindir)
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -15,7 +15,7 @@ class Rsync(Command):

    description = 'Sync source tree from development machine'

-    SYNC_CMD = ('rsync -avz --exclude src/calibre/plugins '
+    SYNC_CMD = ('rsync -avz --delete --exclude src/calibre/plugins '
               '--exclude src/calibre/manual --exclude src/calibre/trac '
               '--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
               '--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
--- a/setup/resources.py
+++ b/setup/resources.py
@ -48,7 +48,9 @@ class Resources(Command):
        dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.xml')
-            open(dest, 'wb').write(serialize_builtin_recipes())
+            xml = serialize_builtin_recipes()
+            with open(dest, 'wb') as f:
+                f.write(xml)

        dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
        files = []
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -378,10 +378,11 @@ def strftime(fmt, t=None):
        t = time.localtime()
    early_year = t[0] < 1900
    if early_year:
+        replacement = 1900 if t[0]%4 == 0 else 1901
        fmt = fmt.replace('%Y', '_early year hack##')
        t = list(t)
        orig_year = t[0]
-        t[0] = 1900
+        t[0] = replacement
    ans = None
    if iswindows:
        if isinstance(fmt, unicode):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.36'
+__version__   = '0.6.40'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -7,6 +7,7 @@ import os
 import glob
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
 from calibre.constants import numeric_version
+from calibre.ebooks.metadata.archive import ArchiveExtract

 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
@ -416,9 +417,10 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
-                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
+                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
+                BOOQ
 from calibre.devices.iliad.driver import ILIAD
-from calibre.devices.irexdr.driver import IREXDR1000
+from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
@ -430,11 +432,11 @@ from calibre.devices.eslick.driver import ESLICK
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
-from calibre.devices.hanvon.driver import N516
+from calibre.devices.hanvon.driver import N516, EB511

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
-plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
+plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
 plugins += [
    ComicInput,
    EPUBInput,
@ -477,6 +479,7 @@ plugins += [
    CYBOOK,
    ILIAD,
    IREXDR1000,
+    IREXDR800,
    JETBOOK,
    SHINEBOOK,
    POCKETBOOK360,
@ -500,9 +503,11 @@ plugins += [
    DBOOK,
    INVESBOOK,
    BOOX,
+    BOOQ,
    EB600,
    README,
    N516,
+    EB511,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -20,7 +20,7 @@ class ANDROID(USBMS):
    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
            0x22b8 : { 0x41d9 : [0x216]},
-            0x18d1 : { 0x4e11 : [0x0100]},
+            0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]},
            }
    EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -184,3 +184,14 @@ class INVESBOOK(EB600):
    VENDOR_NAME = 'INVES_E6'
    WINDOWS_MAIN_MEM = '00INVES_E600'
    WINDOWS_CARD_A_MEM = '00INVES_E600'
+
+class BOOQ(EB600):
+    name = 'Booq Device Interface'
+    gui_name = 'Booq'
+
+    FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']
+
+    VENDOR_NAME = 'NETRONIX'
+    WINDOWS_MAIN_MEM = 'EB600'
+    WINDOWS_CARD_A_MEM = 'EB600'
+
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -126,3 +126,15 @@ class BOOX(HANLINV3):

    EBOOK_DIR_MAIN = 'MyBooks'
    EBOOK_DIR_CARD_A = 'MyBooks'
+
+
+    def windows_sort_drives(self, drives):
+        main = drives.get('main', None)
+        card = drives.get('carda', None)
+        if card and main and card < main:
+            drives['main'] = card
+            drives['carda'] = main
+
+        return drives
+
+
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Hanvon devices
 '''
+import re

 from calibre.devices.usbms.driver import USBMS

@ -32,3 +33,25 @@ class N516(USBMS):

    EBOOK_DIR_MAIN = 'e_book'
    SUPPORTS_SUB_DIRS = True
+
+class EB511(USBMS):
+    name           = 'Elonex EB 511 driver'
+    gui_name       = 'EB 511'
+    description    = _('Communicate with the Elonex EB 511 eBook reader.')
+    author         = 'Kovid Goyal'
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    FORMATS     = ['epub', 'html', 'pdf', 'txt']
+
+    VENDOR_ID   = [0x45e]
+    PRODUCT_ID  = [0xffff]
+    BCD         = [0x0]
+
+    MAIN_MEMORY_VOLUME_LABEL  = 'EB 511 Internal Memory'
+
+    EBOOK_DIR_MAIN = 'e_book'
+    SUPPORTS_SUB_DIRS = True
+
+    OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
+
+
--- a/src/calibre/devices/irexdr/driver.py
+++ b/src/calibre/devices/irexdr/driver.py
@ -36,3 +36,14 @@ class IREXDR1000(USBMS):
    EBOOK_DIR_MAIN = 'ebooks'
    DELETE_EXTS = ['.mbp']
    SUPPORTS_SUB_DIRS = True
+
+class IREXDR800(IREXDR1000):
+    name           = 'IRex Digital Reader 800 Device Interface'
+    description    = _('Communicate with the IRex Digital Reader 800')
+    PRODUCT_ID = [0x002]
+    WINDOWS_MAIN_MEM = 'DR800'
+    FORMATS     = ['epub', 'html', 'pdf', 'txt']
+
+    EBOOK_DIR_MAIN = 'Books'
+    DELETE_EXTS = []
+
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -192,17 +192,15 @@ class PRS505(CLI, Device):
        fix_ids(*booklists)
        if not os.path.exists(self._main_prefix):
            os.makedirs(self._main_prefix)
-        f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
-        booklists[0].write(f)
-        f.close()
+        with open(self._main_prefix + self.__class__.MEDIA_XML, 'wb') as f:
+            booklists[0].write(f)

        def write_card_prefix(prefix, listid):
            if prefix is not None and hasattr(booklists[listid], 'write'):
                if not os.path.exists(prefix):
                    os.makedirs(prefix)
-                f = open(prefix + self.__class__.CACHE_XML, 'wb')
-                booklists[listid].write(f)
-                f.close()
+                with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
+                    booklists[listid].write(f)
        write_card_prefix(self._card_a_prefix, 1)
        write_card_prefix(self._card_b_prefix, 2)

--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
        if href and os.access(path, os.R_OK):
            return open(path, 'rb').read()

+def extract_calibre_cover(raw, base, log):
+    from calibre.ebooks.BeautifulSoup import BeautifulSoup
+    soup = BeautifulSoup(raw)
+    matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
+        'font', 'br'])
+    images = soup.findAll('img')
+    if matches is None and len(images) == 1 and \
+            images[0].get('alt', '')=='cover':
+        img = images[0]
+        img = os.path.join(base, *img['src'].split('/'))
+        if os.path.exists(img):
+            return open(img, 'rb').read()
+
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
                   os.path.dirname(path_to_html), log)
        except:
            pass
+    if data is None:
+        try:
+            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
+        except:
+            pass
    if data is None:
        renderer = render_html(path_to_html, width, height)
        data = getattr(renderer, 'data', None)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
        run_plugins_on_preprocess, run_plugins_on_postprocess
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.date import parse_date
 from calibre import extract, walk

 DEBUG_README=u'''
@ -65,7 +66,7 @@ class Plumber(object):
    metadata_option_names = [
        'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
        'publisher', 'series', 'series_index', 'rating', 'isbn',
-        'tags', 'book_producer', 'language'
+        'tags', 'book_producer', 'language', 'pubdate', 'timestamp'
        ]

    def __init__(self, input, output, log, report_progress=DummyReporter(),
@ -461,6 +462,14 @@ OptionRecommendation(name='language',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the language.')),

+OptionRecommendation(name='pubdate',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the publication date.')),
+
+OptionRecommendation(name='timestamp',
+    recommended_value=None, level=OptionRecommendation.LOW,
+    help=_('Set the book timestamp (used by the date column in calibre).')),
+
 ]

        input_fmt = os.path.splitext(self.input)[1]
@ -619,6 +628,14 @@ OptionRecommendation(name='language',
                    except ValueError:
                        self.log.warn(_('Values of series index and rating must'
                        ' be numbers. Ignoring'), val)
+                        continue
+                elif x in ('timestamp', 'pubdate'):
+                    try:
+                        val = parse_date(val, assume_utc=x=='pubdate')
+                    except:
+                        self.log.exception(_('Failed to parse date/time') + ' ' +
+                                unicode(val))
+                        continue
                setattr(mi, x, val)


--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin):

        self.rationalize_cover(opf, log)

+        self.optimize_opf_parsing = opf
+
        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin):
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
-        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
+        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
+
+        # ADE cries big wet tears when it encounters an invalid fragment
+        # identifier in the NCX toc.
+        frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
+        for node in self.oeb.toc.iter():
+            href = getattr(node, 'href', None)
+            if hasattr(href, 'partition'):
+                base, _, frag = href.partition('#')
+                frag = urlunquote(frag)
+                if frag and frag_pat.match(frag) is None:
+                    self.log.warn(
+                            'Removing invalid fragment identifier %r from TOC'%frag)
+                    node.href = base

        for x in self.oeb.spine:
            root = x.data
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -111,7 +111,7 @@ class HTMLFile(object):
                raise IOError(msg)
            raise IgnoreFile(msg, err.errno)

-        self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
+        self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
        if not self.is_binary:
            if encoding is None:
                encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
@ -408,7 +408,10 @@ class HTMLInput(InputFormatPlugin):
            return link_
        if base and not os.path.isabs(link):
            link = os.path.join(base, link)
-        link = os.path.abspath(link)
+        try:
+            link = os.path.abspath(link)
+        except:
+            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
--- a/src/calibre/ebooks/lrf/pylrs/pylrs.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py
@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
        STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
        BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
        STREAM_FORCE_COMPRESSED)
+from calibre.utils.date import isoformat

 DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
 DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs
@ -852,7 +853,7 @@ class DocInfo(object):
        self.thumbnail = None
        self.language = "en"
        self.creator  = None
-        self.creationdate = date.today().isoformat()
+        self.creationdate = str(isoformat(date.today()))
        self.producer = "%s v%s"%(__appname__, __version__)
        self.numberofpages = "0"

--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -10,9 +10,11 @@ import os, mimetypes, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse

-
 from calibre import relpath

+from calibre.utils.config import tweaks
+from calibre.utils.date import isoformat
+
 _author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
 def string_to_authors(raw):
    raw = raw.replace('&&', u'\uffff')
@ -27,6 +29,9 @@ def authors_to_string(authors):
        return ''

 def author_to_author_sort(author):
+    method = tweaks['author_sort_copy_method']
+    if method == 'copy' or (method == 'comma' and author.count(',') > 0):
+        return author
    tokens = author.split()
    tokens = tokens[-1:] + tokens[:-1]
    if len(tokens) > 1:
@ -340,9 +345,9 @@ class MetaInformation(object):
        if self.rating is not None:
            fmt('Rating', self.rating)
        if self.timestamp is not None:
-            fmt('Timestamp', self.timestamp.isoformat(' '))
+            fmt('Timestamp', isoformat(self.timestamp))
        if self.pubdate is not None:
-            fmt('Published', self.pubdate.isoformat(' '))
+            fmt('Published', isoformat(self.pubdate))
        if self.rights is not None:
            fmt('Rights', unicode(self.rights))
        if self.lccn:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en'
 Fetch metadata using Amazon AWS
 '''
 import sys, re
-from datetime import datetime

 from lxml import etree
-from dateutil import parser

 from calibre import browser
+from calibre.utils.date import parse_date, utcnow
 from calibre.ebooks.metadata import MetaInformation, string_to_authors

 AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn):
        try:
            d = root.findtext('.//'+AWS('PublicationDate'))
            if d:
-                default = datetime.utcnow()
-                default = datetime(default.year, default.month, 15)
-                d = parser.parse(d[0].text, default=default)
+                default = utcnow().replace(day=15)
+                d = parse_date(d[0].text, assume_utc=True, default=default)
                mi.pubdate = d
        except:
            pass
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from contextlib import closing
+
+from calibre.customize import FileTypePlugin
+
+def is_comic(list_of_names):
+    extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names])
+    return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png')
+
+class ArchiveExtract(FileTypePlugin):
+    name = 'Archive Extract'
+    author = 'Kovid Goyal'
+    description = _('Extract common e-book formats from archives '
+        '(zip/rar) files. Also try to autodetect if they are actually '
+        'cbz/cbr files.')
+    file_types = set(['zip', 'rar'])
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+
+    def run(self, archive):
+        is_rar = archive.lower().endswith('.rar')
+        if is_rar:
+            from calibre.libunrar import extract_member, names
+        else:
+            from calibre.utils.zipfile import ZipFile
+            zf = ZipFile(archive, 'r')
+
+        if is_rar:
+            fnames = names(archive)
+        else:
+            fnames = zf.namelist()
+
+        fnames = [x for x in fnames if '.' in x]
+        if is_comic(fnames):
+            ext = '.cbr' if is_rar else '.cbz'
+            of = self.temporary_file('_archive_extract'+ext)
+            with open(archive, 'rb') as f:
+                of.write(f.read())
+            of.close()
+            return of.name
+        if len(fnames) > 1 or not fnames:
+            return archive
+        fname = fnames[0]
+        ext = os.path.splitext(fname)[1][1:]
+        if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
+                'mp3', 'pdb', 'azw', 'azw1'):
+            return archive
+
+        of = self.temporary_file('_archive_extract.'+ext)
+        with closing(of):
+            if is_rar:
+                data = extract_member(archive, match=None, name=fname)[1]
+                of.write(data)
+            else:
+                of.write(zf.read(fname))
+        return of.name
+
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
                    title_sort, MetaInformation
 from calibre.ebooks.lrf.meta import LRFMetaFile
 from calibre import prints
+from calibre.utils.date import parse_date

 USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
 _('''
@ -69,6 +70,8 @@ def config():
              help=_('Set the book producer.'))
    c.add_opt('language', ['-l', '--language'],
              help=_('Set the language.'))
+    c.add_opt('pubdate', ['-d', '--date'],
+              help=_('Set the published date.'))

    c.add_opt('get_cover', ['--get-cover'],
              help=_('Get the cover from the ebook and save it at as the '
@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type):
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
+    if getattr(opts, 'pubdate', None) is not None:
+        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 '''Read meta information from epub files'''

-import os
+import os, re
 from cStringIO import StringIO
 from contextlib import closing

@ -29,15 +29,15 @@ class Container(dict):
    def __init__(self, stream=None):
        if not stream: return
        soup = BeautifulStoneSoup(stream.read())
-        container = soup.find('container')
+        container = soup.find(name=re.compile(r'container$', re.I))
        if not container:
-            raise OCFException("<container/> element missing")
+            raise OCFException("<container> element missing")
        if container.get('version', None) != '1.0':
            raise EPubException("unsupported version of OCF")
-        rootfiles = container.find('rootfiles')
+        rootfiles = container.find(re.compile(r'rootfiles$', re.I))
        if not rootfiles:
            raise EPubException("<rootfiles/> element missing")
-        for rootfile in rootfiles.findAll('rootfile'):
+        for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
            try:
                self[rootfile['media-type']] = rootfile['full-path']
            except KeyError:
@ -69,7 +69,7 @@ class OCFReader(OCF):
        self.opf_path = self.container[OPF.MIMETYPE]
        try:
            with closing(self.open(self.opf_path)) as f:
-                self.opf = OPF(f, self.root)
+                self.opf = OPF(f, self.root, populate_spine=False)
        except KeyError:
            raise EPubException("missing OPF package file")

@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
 def get_cover(opf, opf_path, stream):
    from calibre.ebooks import render_html_svg_workaround
    from calibre.utils.logging import default_log
-    spine = list(opf.spine_items())
-    if not spine:
+    cpage = opf.first_spine_item()
+    if not cpage:
        return
-    cpage = spine[0]
    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            stream.seek(0)
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en'
 import sys, textwrap
 from urllib import urlencode
 from functools import partial
-from datetime import datetime

 from lxml import etree
-from dateutil import parser

 from calibre import browser, preferred_encoding
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import OptionParser
+from calibre.utils.date import parse_date, utcnow

 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
@ -156,9 +155,8 @@ class ResultList(list):
        try:
            d = date(entry)
            if d:
-                default = datetime.utcnow()
-                default = datetime(default.year, default.month, 15)
-                d = parser.parse(d[0].text, default=default)
+                default = utcnow().replace(day=15)
+                d = parse_date(d[0].text, assume_utc=True, default=default)
            else:
                d = None
        except:
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -65,6 +65,10 @@ def _metadata_from_formats(formats):

    return mi

+def is_recipe(filename):
+    return filename.startswith('calibre') and \
+        filename.rpartition('.')[0].endswith('_recipe_out')
+
 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
    if stream_type: stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
@ -84,11 +88,10 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
        return opf

    mi = MetaInformation(None, None)
-    if prefs['read_file_metadata']:
-        mi = get_file_type_metadata(stream, stream_type)
-
    name = os.path.basename(getattr(stream, 'name', ''))
    base = metadata_from_filename(name)
+    if is_recipe(name) or prefs['read_file_metadata']:
+        mi = get_file_type_metadata(stream, stream_type)
    if base.title == os.path.splitext(name)[0] and base.authors is None:
        # Assume that there was no metadata in the file and the user set pattern
        # to match meta info from the file name did not match.
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -11,13 +11,11 @@ __docformat__ = 'restructuredtext en'

 from struct import pack, unpack
 from cStringIO import StringIO
-from datetime import datetime

 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
-
-import struct
+from calibre.utils.date import now as nowf

 class StreamSlicer(object):

@ -105,11 +103,12 @@ class MetadataUpdater(object):
        have_exth = self.have_exth = (flags & 0x40) != 0
        self.cover_record = self.thumbnail_record = None
        self.timestamp = None
-
        self.pdbrecords = self.get_pdbrecords()
+
+        self.original_exth_records = {}
        if not have_exth:
            self.create_exth()
-
+            self.have_exth = True
        # Fetch timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()

@ -131,14 +130,18 @@ class MetadataUpdater(object):
            content = exth[pos + 8: pos + size]
            pos += size

+            self.original_exth_records[id] = content
+
            if id == 106:
                self.timestamp = content
            elif id == 201:
-                rindex, = self.cover_rindex, = unpack('>I', content)
-                self.cover_record = self.record(rindex + image_base)
+                rindex, = self.cover_rindex, = unpack('>i', content)
+                if rindex > 0 :
+                    self.cover_record = self.record(rindex + image_base)
            elif id == 202:
-                rindex, = self.thumbnail_rindex, = unpack('>I', content)
-                self.thumbnail_record = self.record(rindex + image_base)
+                rindex, = self.thumbnail_rindex, = unpack('>i', content)
+                if rindex > 0 :
+                    self.thumbnail_record = self.record(rindex + image_base)

    def patch(self, off, new_record0):
        # Save the current size of each record
@ -181,14 +184,14 @@ class MetadataUpdater(object):
        off = self.pdbrecords[section][0]
        self.patch(off, new)

-    def create_exth(self, exth=None):
+    def create_exth(self, new_title=None, exth=None):
        # Add an EXTH block to record 0, rewrite the stream
        # self.hexdump(self.record0)

-        # Fetch the title
-        title_offset, = struct.unpack('>L', self.record0[0x54:0x58])
-        title_length, = struct.unpack('>L', self.record0[0x58:0x5c])
-        title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
+        # Fetch the existing title
+        title_offset, = unpack('>L', self.record0[0x54:0x58])
+        title_length, = unpack('>L', self.record0[0x58:0x5c])
+        title_in_file, = unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])

        # Adjust length to accommodate PrimaryINDX if necessary
        mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
@ -207,14 +210,21 @@ class MetadataUpdater(object):
            exth = ['EXTH', pack('>II', 12, 0), pad]
            exth = ''.join(exth)

-        # Update title_offset
+        # Update title_offset, title_len if new_title
        self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
+        if new_title:
+            self.record0[0x58:0x5c] = pack('>L', len(new_title))

        # Create an updated Record0
        new_record0 = StringIO()
        new_record0.write(self.record0[:0x10 + mobi_header_length])
        new_record0.write(exth)
-        new_record0.write(title_in_file)
+        if new_title:
+            #new_record0.write(new_title.encode(self.codec, 'replace'))
+            new_title = (new_title or _('Unknown')).encode(self.codec, 'replace')
+            new_record0.write(new_title)
+        else:
+            new_record0.write(title_in_file)

        # Pad to a 4-byte boundary
        trail = len(new_record0.getvalue()) % 4
@ -244,7 +254,7 @@ class MetadataUpdater(object):
    def get_pdbrecords(self):
        pdbrecords = []
        for i in xrange(self.nrecs):
-            offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
+            offset, a1,a2,a3,a4 = unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
            flags, val = a1, a2<<16|a3<<8|a4
            pdbrecords.append( [offset, flags, val] )
        return pdbrecords
@ -275,6 +285,10 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)

    def update(self, mi):
+        def pop_exth_record(exth_id):
+            if exth_id in self.original_exth_records:
+                self.original_exth_records.pop(exth_id)
+
        if self.type != "BOOKMOBI":
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
                                "\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
@ -289,35 +303,53 @@ class MetadataUpdater(object):
        if mi.author_sort and pas:
            authors = mi.author_sort
            recs.append((100, authors.encode(self.codec, 'replace')))
+            pop_exth_record(100)
        elif mi.authors:
            authors = '; '.join(mi.authors)
            recs.append((100, authors.encode(self.codec, 'replace')))
+            pop_exth_record(100)
        if mi.publisher:
            recs.append((101, mi.publisher.encode(self.codec, 'replace')))
+            pop_exth_record(101)
        if mi.comments:
            recs.append((103, mi.comments.encode(self.codec, 'replace')))
+            pop_exth_record(103)
        if mi.isbn:
            recs.append((104, mi.isbn.encode(self.codec, 'replace')))
+            pop_exth_record(104)
        if mi.tags:
            subjects = '; '.join(mi.tags)
            recs.append((105, subjects.encode(self.codec, 'replace')))
+            pop_exth_record(105)
        if mi.pubdate:
            recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
+            pop_exth_record(106)
        elif mi.timestamp:
            recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
+            pop_exth_record(106)
        elif self.timestamp:
            recs.append((106, self.timestamp))
+            pop_exth_record(106)
        else:
-            recs.append((106, str(datetime.now()).encode(self.codec, 'replace')))
+            recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
+            pop_exth_record(106)
        if self.cover_record is not None:
            recs.append((201, pack('>I', self.cover_rindex)))
            recs.append((203, pack('>I', 0)))
+            pop_exth_record(201)
+            pop_exth_record(203)
        if self.thumbnail_record is not None:
            recs.append((202, pack('>I', self.thumbnail_rindex)))
+            pop_exth_record(202)

        if getattr(self, 'encryption_type', -1) != 0:
            raise MobiError('Setting metadata in DRMed MOBI files is not supported.')

+        # Restore any original EXTH fields that weren't modified/updated
+        for id in sorted(self.original_exth_records):
+            recs.append((id, self.original_exth_records[id]))
+        recs = sorted(recs, key=lambda x:(x[0],x[0]))
+
        exth = StringIO()
        for code, data in recs:
            exth.write(pack('>II', code, len(data) + 8))
@ -332,7 +364,7 @@ class MetadataUpdater(object):
            raise MobiError('No existing EXTH record. Cannot update metadata.')

        self.record0[92:96] = iana2mobi(mi.language)
-        self.create_exth(exth)
+        self.create_exth(exth=exth, new_title=mi.title)

        # Fetch updated timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -12,12 +12,12 @@ from urllib import unquote
 from urlparse import urlparse

 from lxml import etree
-from dateutil import parser

 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.constants import __appname__, __version__, filesystem_encoding
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
+from calibre.utils.date import parse_date, isoformat


 class Resource(object):
@ -272,6 +272,10 @@ class Spine(ResourceCollection):
            self.id = idfunc(self.path)
            self.idref = None

+        def __repr__(self):
+            return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
+                    (self.path, self.id, self.is_linear)
+
    @staticmethod
    def from_opf_spine_element(itemrefs, manifest):
        s = Spine(manifest)
@ -280,7 +284,7 @@ class Spine(ResourceCollection):
            if idref is not None:
                path = s.manifest.path_for_id(idref)
                if path:
-                    r = Spine.Item(s.manifest.id_for_path, path, is_path=True)
+                    r = Spine.Item(lambda x:idref, path, is_path=True)
                    r.is_linear = itemref.get('linear', 'yes') == 'yes'
                    r.idref = idref
                    s.append(r)
@ -441,6 +445,8 @@ class OPF(object):
    guide_path      = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')

    title           = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
+    title_sort      = MetadataField('title_sort', formatter=lambda x:
+                        re.sub(r'\s+', ' ', x), is_dc=False)
    publisher       = MetadataField('publisher')
    language        = MetadataField('language')
    comments        = MetadataField('description')
@ -449,12 +455,14 @@ class OPF(object):
    series          = MetadataField('series', is_dc=False)
    series_index    = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
    rating          = MetadataField('rating', is_dc=False, formatter=int)
-    pubdate         = MetadataField('date', formatter=parser.parse)
+    pubdate         = MetadataField('date', formatter=parse_date)
    publication_type = MetadataField('publication_type', is_dc=False)
-    timestamp       = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
+    timestamp       = MetadataField('timestamp', is_dc=False,
+                                    formatter=parse_date)


-    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
+    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
+            populate_spine=True):
        if not hasattr(stream, 'read'):
            stream = open(stream, 'rb')
        raw = stream.read()
@ -477,7 +485,7 @@ class OPF(object):
            self.manifest = Manifest.from_opf_manifest_element(m, basedir)
        self.spine = None
        s = self.spine_path(self.root)
-        if s:
+        if populate_spine and s:
            self.spine = Spine.from_opf_spine_element(s, self.manifest)
        self.guide = None
        guide = self.guide_path(self.root)
@ -584,6 +592,15 @@ class OPF(object):
                if x.get('id', None) == idref:
                    yield x.get('href', '')

+    def first_spine_item(self):
+        items = self.iterspine()
+        if not items:
+            return None
+        idref = items[0].get('idref', '')
+        for x in self.itermanifest():
+            if x.get('id', None) == idref:
+                return x.get('href', None)
+
    def create_spine_item(self, idref):
        ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
        ans.tail = '\n\t\t'
@ -675,29 +692,6 @@ class OPF(object):

        return property(fget=fget, fset=fset)

-    @dynamic_property
-    def title_sort(self):
-
-        def fget(self):
-            matches = self.title_path(self.metadata)
-            if matches:
-                for match in matches:
-                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
-                    if not ans:
-                        ans = match.get('file-as', None)
-                    if ans:
-                        return ans
-
-        def fset(self, val):
-            matches = self.title_path(self.metadata)
-            if matches:
-                for key in matches[0].attrib:
-                    if key.endswith('file-as'):
-                        matches[0].attrib.pop(key)
-                matches[0].set('file-as', unicode(val))
-
-        return property(fget=fget, fset=fset)
-
    @dynamic_property
    def tags(self):

@ -869,7 +863,8 @@ class OPF(object):
    def smart_update(self, mi):
        for attr in ('title', 'authors', 'author_sort', 'title_sort',
                     'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'language', 'tags', 'category', 'comments'):
+                     'isbn', 'language', 'tags', 'category', 'comments',
+                     'pubdate'):
            val = getattr(mi, attr, None)
            if val is not None and val != [] and val != (None, None):
                setattr(self, attr, val)
@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True):
            elem.text = text.strip()
        metadata.append(elem)

-    factory(DC('title'), mi.title, mi.title_sort)
+    factory(DC('title'), mi.title)
    for au in mi.authors:
        factory(DC('creator'), au, mi.author_sort, 'aut')
    factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
    if hasattr(mi.pubdate, 'isoformat'):
-        factory(DC('date'), mi.pubdate.isoformat())
+        factory(DC('date'), isoformat(mi.pubdate))
    factory(DC('language'), mi.language)
    if mi.category:
        factory(DC('type'), mi.category)
@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True):
    if mi.rating is not None:
        meta('rating', str(mi.rating))
    if hasattr(mi.timestamp, 'isoformat'):
-        meta('timestamp', mi.timestamp.isoformat())
+        meta('timestamp', isoformat(mi.timestamp))
    if mi.publication_type:
        meta('publication_type', mi.publication_type)
+    if mi.title_sort:
+        meta('title_sort', mi.title_sort)

    metadata[-1].tail = '\n' +(' '*4)

@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True):


 def test_m2o():
-    from datetime import datetime
+    from calibre.utils.date import now as nowf
    from cStringIO import StringIO
    mi = MetaInformation('test & title', ['a"1', "a'2"])
    mi.title_sort = 'a\'"b'
    mi.author_sort = 'author sort'
-    mi.pubdate = datetime.now()
+    mi.pubdate = nowf()
    mi.language = 'en'
    mi.category = 'test'
    mi.comments = 'what a fun book\n\n'
@ -1103,7 +1100,7 @@ def test_m2o():
    mi.series = 's"c\'l&<>'
    mi.series_index = 3.34
    mi.rating = 3
-    mi.timestamp = datetime.now()
+    mi.timestamp = nowf()
    mi.publication_type = 'ooooo'
    mi.rights = 'yes'
    mi.cover = 'asd.jpg'
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@ -13,6 +13,9 @@ from calibre.ptempfile import PersistentTemporaryFile
 from calibre.libunrar import extract_member, names

 def get_metadata(stream):
+    from calibre.ebooks.metadata.archive import is_comic
+    from calibre.ebooks.metadata.meta import get_metadata
+
    path = getattr(stream, 'name', False)
    if not path:
        pt = PersistentTemporaryFile('_rar-meta.rar')
@ -21,6 +24,8 @@ def get_metadata(stream):
        path = pt.name
    path = os.path.abspath(path)
    file_names = list(names(path))
+    if is_comic(file_names):
+        return get_metadata(stream, 'cbr')
    for f in file_names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
@ -29,7 +34,6 @@ def get_metadata(stream):
                               'rb', 'imp', 'pdf', 'lrf'):
                data = extract_member(path, match=None, name=f)[1]
                stream = StringIO(data)
-                from calibre.ebooks.metadata.meta import get_metadata
                return get_metadata(stream, stream_type)
    raise ValueError('No ebook found in RAR archive')

--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@ -8,15 +8,21 @@ from cStringIO import StringIO


 def get_metadata(stream):
+    from calibre.ebooks.metadata.meta import get_metadata
+    from calibre.ebooks.metadata.archive import is_comic
    stream_type = None
    zf = ZipFile(stream, 'r')
-    for f in zf.namelist():
+    names = zf.namelist()
+    if is_comic(names):
+        # Is probably a comic
+        return get_metadata(stream, 'cbz')
+
+    for f in names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                from calibre.ebooks.metadata.meta import get_metadata
                stream = StringIO(zf.read(f))
                return get_metadata(stream, stream_type)
    raise ValueError('No ebook found in ZIP archive')
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''

-import datetime
 import functools
 import os
 import re
 import struct
 import textwrap
-
 import cStringIO

 try:
@ -23,6 +21,7 @@ from lxml import html, etree

 from calibre import entity_to_unicode, CurrentDir
 from calibre.utils.filenames import ascii_filename
+from calibre.utils.date import parse_date
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks import DRMError
 from calibre.ebooks.chardet import ENCODING_PATS
@ -68,7 +67,10 @@ class EXTHHeader(object):
                pass
            elif id == 503: # Long title
                if not title or title == _('Unknown'):
-                    title = content
+                    try:
+                        title = content.decode(codec)
+                    except:
+                        pass
            #else:
            #    print 'unknown record', id, repr(content)
        if title:
@ -96,8 +98,7 @@ class EXTHHeader(object):
            self.mi.tags = list(set(self.mi.tags))
        elif id == 106:
            try:
-                self.mi.publish_date = datetime.datetime.strptime(
-                    content, '%Y-%m-%d', ).date()
+                self.mi.pubdate = parse_date(content, as_utc=False)
            except:
                pass
        elif id == 108:
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -310,6 +310,7 @@ class Serializer(object):
        text = text.replace('&', '&amp;')
        text = text.replace('<', '&lt;')
        text = text.replace('>', '&gt;')
+        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
        self.buffer.write(encode(text))
@ -610,12 +611,21 @@ class MobiWriter(object):
            if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
                if offset != previousOffset + previousLength :
                    self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
-                    self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
+                    self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
                        (i-1, entries[i-1].title, previousOffset, previousLength) )
                    self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
                        (i, child.title, offset, previousOffset + previousLength) )
-                    self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
-                    self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
+                    # self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
+                    # self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
+                    # Dump the offending entry
+                    self._oeb.log.info("...")
+                    for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)):
+                        if z == i:
+                            self._oeb.log.warning("child %03d: %s" % (z, entries[z]))
+                        else:
+                            self._oeb.log.info("child %03d: %s" % (z, entries[z]))
+                    self._oeb.log.info("...")
+
                    self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
                    # Zero out self._HTMLRecords, return False
                    self._HTMLRecords = []
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
 from urlparse import urljoin

 from lxml import etree, html
+from cssutils import CSSParser

 import calibre
-from cssutils import CSSParser
+from calibre.constants import filesystem_encoding
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -434,10 +435,18 @@ class DirContainer(object):

    def namelist(self):
        names = []
-        for root, dirs, files in os.walk(self.rootdir):
+        base = self.rootdir
+        if isinstance(base, unicode):
+            base = base.encode(filesystem_encoding)
+        for root, dirs, files in os.walk(base):
            for fname in files:
                fname = os.path.join(root, fname)
                fname = fname.replace('\\', '/')
+                if not isinstance(fname, unicode):
+                    try:
+                        fname = fname.decode(filesystem_encoding)
+                    except:
+                        continue
                names.append(fname)
        return names

@ -842,8 +851,10 @@ class Manifest(object):
                    self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
                    nroot = etree.fromstring('<html><body/></html>')
                    parent = nroot[0]
-                for child in list(data):
-                    child.getparent().remove(child)
+                for child in list(data.iter()):
+                    oparent = child.getparent()
+                    if oparent is not None:
+                        oparent.remove(child)
                    parent.append(child)
                data = nroot

@ -1567,14 +1578,17 @@ class TOC(object):
            parent = etree.Element(NCX('navMap'))
        for node in self.nodes:
            id = node.id or unicode(uuid.uuid4())
-            attrib = {'id': id, 'playOrder': str(node.play_order)}
+            po = node.play_order
+            if po == 0:
+                po = 1
+            attrib = {'id': id, 'playOrder': str(po)}
            if node.klass:
                attrib['class'] = node.klass
            point = element(parent, NCX('navPoint'), attrib=attrib)
            label = etree.SubElement(point, NCX('navLabel'))
            title = node.title
            if title:
-                title = re.sub(r'\s', ' ', title)
+                title = re.sub(r'\s+', ' ', title)
            element(label, NCX('text')).text = title
            element(point, NCX('content'), src=urlunquote(node.href))
            node.to_ncx(point)
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -120,7 +120,10 @@ class EbookIterator(object):
        bad_map = {}
        font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
        for csspath in css_files:
-            css = open(csspath, 'rb').read().decode('utf-8', 'replace')
+            try:
+                css = open(csspath, 'rb').read().decode('utf-8', 'replace')
+            except:
+                continue
            for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
                block  = match.group(1)
                family = font_family_pat.search(block)
@ -181,8 +184,9 @@ class EbookIterator(object):
        if hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

-
-        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
+        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
+        if self.opf is None:
+            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os
-from datetime import datetime
+from calibre.utils.date import isoformat, now

 def meta_info_to_oeb_metadata(mi, m, log):
    from calibre.ebooks.oeb.base import OPF
@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log):
            m.add('subject', t)
    if mi.pubdate is not None:
        m.clear('date')
-        m.add('date', mi.pubdate.isoformat())
+        m.add('date', isoformat(mi.pubdate))
    if mi.timestamp is not None:
        m.clear('timestamp')
-        m.add('timestamp', mi.timestamp.isoformat())
+        m.add('timestamp', isoformat(mi.timestamp))
    if mi.rights is not None:
        m.clear('rights')
        m.add('rights', mi.rights)
@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
        m.clear('publication_type')
        m.add('publication_type', mi.publication_type)
    if not m.timestamp:
-        m.add('timestamp', datetime.now().isoformat())
+        m.add('timestamp', isoformat(now()))


 class MergeMetadata(object):
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -35,7 +35,10 @@ class RescaleImages(object):
                if not raw: continue
                if qt:
                    img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
-                    if not img.loadFromData(raw): continue
+                    try:
+                        if not img.loadFromData(raw): continue
+                    except:
+                        continue
                    width, height = img.width(), img.height()
                else:
                    f = cStringIO.StringIO(raw)
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -42,9 +42,9 @@ class Writer(FormatWriter):
        pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')

        text, text_sizes = self._text(pml)
-        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
-        chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
-        chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
+        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
+        chapter_index += self._index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
+        chapter_index += self._index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
        link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
        images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
        metadata = [self._metadata(metadata)]
--- a/src/calibre/ebooks/pdf/main.cpp
+++ b/src/calibre/ebooks/pdf/main.cpp
@ -169,6 +169,8 @@ int main(int argc, char **argv) {
    char *memblock;
    ifstream::pos_type size;
    int ret = 0;
+    map<string,string> info;
+    Reflow *reflow = NULL;


    if (argc != 2)  {
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
    }

    try {
-        Reflow reflow(memblock, size);
-        reflow.render();
-        vector<char> *data = reflow.render_first_page();
+        reflow = new Reflow(memblock, size);
+        info = reflow->get_info();
+        for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
+            cout << (*it).first << " : " << (*it).second << endl;
+        }
+        //reflow->render();
+        vector<char> *data = reflow->render_first_page();
        ofstream file("cover.png", ios::binary);
        file.write(&((*data)[0]), data->size());
        delete data;
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
        cerr << e.what() << endl;
        ret = 1;
    }
-
+    delete reflow;
    delete[] memblock;
    return ret;
 }
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys
+import sys, os

 from lxml import etree

@ -47,6 +47,10 @@ class Image(Element):
        return '<img src="%s" width="%dpx" height="%dpx"/>' % \
                (self.src, int(self.width), int(self.height))

+    def dump(self, f):
+        f.write(self.to_html())
+        f.write('\n')
+

 class Text(Element):

@ -91,6 +95,10 @@ class Text(Element):
    def to_html(self):
        return self.raw

+    def dump(self, f):
+        f.write(self.to_html().encode('utf-8'))
+        f.write('\n')
+
 class FontSizeStats(dict):

    def __init__(self, stats):
@ -143,6 +151,14 @@ class Column(object):
    def add(self, elem):
        if elem in self.elements: return
        self.elements.append(elem)
+        self._post_add()
+
+    def prepend(self, elem):
+        if elem in self.elements: return
+        self.elements.insert(0, elem)
+        self._post_add()
+
+    def _post_add(self):
        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
        self.top = self.elements[0].top
        self.bottom = self.elements[-1].bottom
@ -183,6 +199,11 @@ class Column(object):
            return None
        return self.elements[idx-1]

+    def dump(self, f, num):
+        f.write('******** Column %d\n\n'%num)
+        for elem in self.elements:
+            elem.dump(f)
+

 class Box(list):

@ -262,7 +283,6 @@ class Region(object):
            max_lines = max(max_lines, len(c))
        return max_lines

-
    @property
    def is_small(self):
        return self.line_count < 3
@ -283,7 +303,6 @@ class Region(object):
                mc = self.columns[0]
            return mc

-        print
        for c in singleton.columns:
            for elem in c:
                col = most_suitable_column(elem)
@ -304,6 +323,51 @@ class Region(object):
        for x in self.columns:
            yield x

+    def absorb_regions(self, regions, at):
+        for region in regions:
+            self.absorb_region(region, at)
+
+    def absorb_region(self, region, at):
+        if len(region.columns) <= len(self.columns):
+            for i in range(len(region.columns)):
+                src, dest = region.columns[i], self.columns[i]
+                if at != 'bottom':
+                    src = reversed(list(iter(src)))
+                for elem in src:
+                    func = dest.add if at == 'bottom' else dest.prepend
+                    func(elem)
+
+        else:
+            col_map = {}
+            for i, col in enumerate(region.columns):
+                max_overlap, max_overlap_index = 0, 0
+                for j, dcol in enumerate(self.columns):
+                    sint = Interval(col.left, col.right)
+                    dint = Interval(dcol.left, dcol.right)
+                    width = sint.intersection(dint).width
+                    if width > max_overlap:
+                        max_overlap = width
+                        max_overlap_index = j
+                col_map[i] = max_overlap_index
+            lines = max(map(len, region.columns))
+            if at == 'bottom':
+                lines = range(lines)
+            else:
+                lines = range(lines-1, -1, -1)
+            for i in lines:
+                for j, src in enumerate(region.columns):
+                    dest = self.columns[col_map[j]]
+                    if i < len(src):
+                        func = dest.add if at == 'bottom' else dest.prepend
+                        func(src.elements[i])
+
+    def dump(self, f):
+        f.write('############################################################\n')
+        f.write('########## Region (%d columns) ###############\n'%len(self.columns))
+        f.write('############################################################\n\n')
+        for i, col in enumerate(self.columns):
+            col.dump(f, i)
+
    def linearize(self):
        self.elements = []
        for x in self.columns:
@ -376,7 +440,8 @@ class Page(object):
                self.font_size_stats[t.font_size] = 0
            self.font_size_stats[t.font_size] += len(t.text_as_string)
            self.average_text_height += t.height
-        self.average_text_height /= len(self.texts)
+        if len(self.texts):
+            self.average_text_height /= len(self.texts)

        self.font_size_stats = FontSizeStats(self.font_size_stats)

@ -431,31 +496,78 @@ class Page(object):
        if not current_region.is_empty:
            self.regions.append(current_region)

+        if self.opts.verbose > 2:
+            self.debug_dir = 'page-%d'%self.number
+            os.mkdir(self.debug_dir)
+            self.dump_regions('pre-coalesce')
+
        self.coalesce_regions()
+        self.dump_regions('post-coalesce')
+
+    def dump_regions(self, fname):
+        fname = 'regions-'+fname+'.txt'
+        with open(os.path.join(self.debug_dir, fname), 'wb') as f:
+            f.write('Page #%d\n\n'%self.number)
+            for region in self.regions:
+                region.dump(f)

    def coalesce_regions(self):
        # find contiguous sets of small regions
        # absorb into a neighboring region (prefer the one with number of cols
        # closer to the avg number of cols in the set, if equal use larger
        # region)
-        # merge contiguous regions that can contain each other
-        absorbed = set([])
        found = True
+        absorbed = set([])
+        processed = set([])
        while found:
            found = False
            for i, region in enumerate(self.regions):
-                if region.is_small:
+                if region in absorbed:
+                    continue
+                if region.is_small and region not in processed:
                    found = True
-                    regions = []
+                    processed.add(region)
+                    regions = [region]
+                    end = i+1
                    for j in range(i+1, len(self.regions)):
+                        end = j
                        if self.regions[j].is_small:
                            regions.append(self.regions[j])
                        else:
                            break
-                    prev = None if i == 0 else i-1
-                    next = j if self.regions[j] not in regions else None
-
-
+                    prev_region = None if i == 0 else i-1
+                    next_region = end if end < len(self.regions) and self.regions[end] not in regions else None
+                    absorb_at = 'bottom'
+                    if prev_region is None and next_region is not None:
+                        absorb_into = next_region
+                        absorb_at = 'top'
+                    elif next_region is None and prev_region is not None:
+                        absorb_into = prev_region
+                    elif prev_region is None and next_region is None:
+                        if len(regions) > 1:
+                            absorb_into = i
+                            regions = regions[1:]
+                        else:
+                            absorb_into = None
+                    else:
+                        absorb_into = prev_region
+                        if self.regions[next_region].line_count >= \
+                                self.regions[prev_region].line_count:
+                            avg_column_count = sum([len(r.columns) for r in
+                                regions])/float(len(regions))
+                            if self.regions[next_region].line_count > \
+                                    self.regions[prev_region].line_count \
+                               or abs(avg_column_count -
+                                       len(self.regions[prev_region].columns)) \
+                               > abs(avg_column_count -
+                                       len(self.regions[next_region].columns)):
+                                   absorb_into = next_region
+                                   absorb_at = 'top'
+                    if absorb_into is not None:
+                        self.regions[absorb_into].absorb_regions(regions, absorb_at)
+                        absorbed.update(regions)
+        for region in absorbed:
+            self.regions.remove(region)

    def sort_into_columns(self, elem, neighbors):
        neighbors.add(elem)
@ -575,8 +687,9 @@ class PDFDocument(object):
        for elem in self.elements:
            html.extend(elem.to_html())
        html += ['</body>', '</html>']
+        raw = (u'\n'.join(html)).replace('</strong><strong>', '')
        with open('index.html', 'wb') as f:
-            f.write((u'\n'.join(html)).encode('utf-8'))
+            f.write(raw.encode('utf-8'))



--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -182,10 +182,10 @@ class PML_HTMLizer(object):
        return pml

    def strip_pml(self, pml):
-        pml = re.sub(r'\\C\d=".+*"', '', pml)
-        pml = re.sub(r'\\Fn=".+*"', '', pml)
-        pml = re.sub(r'\\Sd=".+*"', '', pml)
-        pml = re.sub(r'\\.=".+*"', '', pml)
+        pml = re.sub(r'\\C\d=".*"', '', pml)
+        pml = re.sub(r'\\Fn=".*"', '', pml)
+        pml = re.sub(r'\\Sd=".*"', '', pml)
+        pml = re.sub(r'\\.=".*"', '', pml)
        pml = re.sub(r'\\X\d', '', pml)
        pml = re.sub(r'\\S[pbd]', '', pml)
        pml = re.sub(r'\\Fn', '', pml)
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -27,7 +27,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \
    paragraph_def, convert_to_tags, output, copy, \
    list_numbers, info, pict, table_info, fonts, paragraphs, \
    body_styles, preamble_rest, group_styles, \
-    inline, correct_unicode
+    inline
 from calibre.ebooks.rtf2xml.old_rtf import OldRtf

 """
@ -256,15 +256,6 @@ class ParseRtf:
           )
        pict_obj.process_pict()
        self.__bracket_match('pict_data_info')
-        correct_uni_obj = correct_unicode.CorrectUnicode(
-            in_file = self.__temp_file,
-            bug_handler = RtfInvalidCodeException,
-            copy = self.__copy,
-            run_level = self.__run_level,
-            exception_handler = InvalidRtfException,
-           )
-        correct_uni_obj.correct_unicode()
-        self.__bracket_match('correct_unicode_info')
        combine_obj = combine_borders.CombineBorders(
            in_file = self.__temp_file,
            bug_handler = RtfInvalidCodeException,
--- a/src/calibre/ebooks/rtf2xml/correct_unicode.py
+++ b/src/calibre/ebooks/rtf2xml/correct_unicode.py
@ -1,94 +0,0 @@
-#########################################################################
-#                                                                       #
-#                                                                       #
-#   copyright 2002 Paul Henry Tremblay                                  #
-#                                                                       #
-#   This program is distributed in the hope that it will be useful,     #
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
-#   General Public License for more details.                            #
-#                                                                       #
-#   You should have received a copy of the GNU General Public License   #
-#   along with this program; if not, write to the Free Software         #
-#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
-#   02111-1307 USA                                                      #
-#                                                                       #
-#                                                                       #
-#########################################################################
-import os, re,  tempfile
-from calibre.ebooks.rtf2xml import copy
-class CorrectUnicode:
-    """
-    corrects sequences such as \u201c\'F0\'BE
-    Where \'F0\'BE has to be eliminated.
-    """
-    def __init__(self,
-            in_file,
-            exception_handler,
-            bug_handler,
-            copy = None,
-            run_level = 1,
-            ):
-        self.__file = in_file
-        self.__bug_handler = bug_handler
-        self.__copy = copy
-        self.__run_level = run_level
-        self.__write_to = tempfile.mktemp()
-        self.__exception_handler = exception_handler
-        self.__bug_handler = bug_handler
-        self.__state = 'outside'
-        self.__utf_exp = re.compile(r'&#x(.*?);')
-    def __process_token(self, line):
-        if self.__state == 'outside':
-            if line[:5] == 'tx<ut':
-                self.__handle_unicode(line)
-            else:
-                self.__write_obj.write(line)
-        elif self.__state == 'after':
-            if line[:5] == 'tx<hx':
-                pass
-            elif line[:5] == 'tx<ut':
-                self.__handle_unicode(line)
-            else:
-                self.__state = 'outside'
-                self.__write_obj.write(line)
-        else:
-            raise 'should\'t happen'
-    def __handle_unicode(self, line):
-        token = line[16:]
-        match_obj = re.search(self.__utf_exp, token)
-        if match_obj:
-            uni_char = match_obj.group(1)
-            dec_num = int(uni_char, 16)
-            if dec_num > 57343 and dec_num < 63743:
-                self.__state = 'outside'
-            else:
-                self.__write_obj.write(line)
-                self.__state = 'after'
-        else:
-            self.__write_obj.write(line)
-            self.__state = 'outside'
-    def correct_unicode(self):
-        """
-        Requires:
-            nothing
-        Returns:
-            nothing (changes the original file)
-        Logic:
-            Read one line in at a time.
-        """
-        read_obj = open(self.__file, 'r')
-        self.__write_obj = open(self.__write_to, 'w')
-        line_to_read = 1
-        while line_to_read:
-            line_to_read = read_obj.readline()
-            line = line_to_read
-            self.__token_info = line[:16]
-            self.__process_token(line)
-        read_obj.close()
-        self.__write_obj.close()
-        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
-        if self.__copy:
-            copy_obj.copy_file(self.__write_to, "correct_unicode.data")
-        copy_obj.rename(self.__write_to, self.__file)
-        os.remove(self.__write_to)
--- a/Show More
+++ b/Show More