updated from main branch

2025-07-09 03:04:10 -04:00 · 2010-02-21 10:02:53 -08:00 · 2010-02-21 10:02:53 -08:00 · c91f022385
commit c91f022385
parent f488c66740 108c53e194
213 changed files with 36495 additions and 27669 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,250 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 - version: 0.6.40
  date: 2010-02-12
  new features:
    - title: "Ability to perform exact match and regular expression based searches."
      type: major
      tickets: [4830]
      description: >
        "You can now perform exact match searches by prefixing your search term with an =.
        So for example, tag:=fiction will match all tags named fiction, but not tags named
        non-fiction. Similarly, you can use regular expression based searches by prefixing
        the search term by ~."
    - title: "Autodetect if a zip/rar file is actually a comic and if so, import it as CBZ/CBR"
      tickets: [4753]
    - title: "Add plugin to automatically extract an ebook during import if it is in a zip/rar archive"
    - title: "Linux source install: Install a calibre environment module to ease the integration of calibre into other python projects"
  bug fixes:
    - title: "Fix regression in 0.6.39 that broke the LRF viewer"
    - title: "ZIP/EPUB files: Try to detect file name encoding instead of assuming the name is encoded in UTF-8. Also correctly
              encode the extracted file name in the local filesystem encoding."
    - title: "HTML Input: Handle HTML fragments more gracefully"
      tickets: [4854]
    - title: "Zip files: Workaround invalid zip files that contain end-of-file comments but set comment size to zero"
    - title: "Restore the recipe for the Wired daily feed."
      tickets: [4871]
    - title: "MOBI metadata: Preserve original EXTH records when not overwrriten by calibre metadata."
    - title: "Catalog generation: Improved series sorting. All books not in a series are now grouped together"
    - title: "Fix occassional threading related crash when using the ChooseFormatDialog"
    - title: "Catalog generation: Various fixes for handling invalid data"
  new recipes:
    - title: Sueddeustche Zeitung 
      author: Darko Miletic
  improved recipes:
    - Pagina 12
    - Variety
    - Toronto Sun
    - Telegraph UK
    - Danas
    - Dilbert
 - version: 0.6.39
  date: 2010-02-09
  new features:
    - title: "Add ability to control how author sort strings are automatically generated from author strings, via the config file tweaks.py"
    - title: "Handle broken EPUB files from Project Gutenberg that have invalid OCF containers"
      tickets: [4832]
  bug fixes:
    - title: "Fix regression in 0.6.38 that broke setting bookmarks in the viewer"
    - title: "HTML Input: Ignore filenames that are encoded incorerctly."
  new recipes:
    - title: Radikal
      author: Darko Miletic
 - version: 0.6.38
  date: 2010-02-09
  new features:
    - title: "Driver for the Irex DR 800"
    - title: "Driver for the Booq e-book reader"
    - title: "Allow automatic series increment algorithm to be tweaked by editing the config file tweaks.py"
    - title: "Various improvements to the catlog generation. Larger thumbnails in EPUB output and better series sorting. Better handling of html markup in the comments."
    - title: "MOBI Output: Make font used for generated masthead images user customizable."
  bug fixes:
    - title: "E-book viewer: Make bookmarking (and remebering last open position more robust). For linuxsource installs, you must have Qt 4.6"
      tickets: [4812]
    - title: "Fix conversion/import of HTML files with very long href links on windows"
      tickets: [4783]
    - title: "Don't read metadata from filenames for download news, even if the user has the read metadata from filename option set"
      tickets: [4758]
    - title: "Don't allow leading or trailing space in tags and series. Also normalize all internal spaces to a single space"
      tickets: [4809]
    - title: "E-book viewer: Toolbars remember their position"
      tickets: [4811]
    - title: "Fix year being repeated when editing date in main library screen on windows"
      tickets: [4829]
    - title: "New download: Fix downloading of images from URLs with an ampersand in them"
    - title: "Linux source install: unbundle cssutils, it is now an external dependancy"
    - title: "MOBI metadata: Fix regression that broke setting of titles in some MOBI files"
    - title: "EPUB metadata: Extract the cover image from the html it is embededd in if possible, instead of rendering the html. Removes the white margins on covers and speeds up cover extraction"
    - title: "Fix regression in PDB output"
    - title: "News download: Remove <base> tags automatically"
    - title: "Searching on device: Ignore unicode errors"
  new recipes:
    - title: Courier Press
      author: Krittika Goyal
    - title: zive.sk and iliterature.cz
      author: Abelturd
    - title: El Comerico, Digital Spy UK, Gizmodo, News Straits Times, Read It Later, TidBits
      author: Darko Miletic
  improved recipes:
    - Jerusalem Post
    - Clarin
    - La Nacion
    - Harvard Business Review
    - People US Mashup
    - The New Republic
    - "Pagina 12"
    - Discover Magazine
    - Metro Montreal
 - version: 0.6.37
  date: 2010-02-01
  new features:
    - title: "E-book viewer: Add support for viewing SVG images"
      type: major
    - title: "Add category of Recently added books when generating catalog in e-book format"
    - title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
    - title: "Add support for masthead images when downloading news for the Kindle"
    - title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
  bug fixes:
    - title: Changing the date in Dutch
      tickets: [4732]
    - title: "Fix regression that broke sending files to unupdated PRS 500s"
    - title: "MOBI Input: Ignore width and height percentage measures for <img> tags."
      tickets: [4726]
    - title: "EPUB Output: Remove <img> tags that point to the internet for their images as this causes the ever delicate ADE to crash."
      tickets: [4692]
    - title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
      tickets: [4683]
    - title: "Allow rating to be cleared via the Bulk metadata edit dialog"
      tickets: [4693]
    - title: "Add workaround for broken linux systems with multiply encoded file names"
      tickets: [4721]
    - title: Fix bug preventing the the use of indices when setting save to disk templates
      tickets: [4710]
    - title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
      tickets: [4707]
    - title: "Catalog generation: Make sorting of numbers in title as text optional"
    - title: "Fix error while sending book with non-ascii character in title/author to device on linux"
      tickets: [4690]
    - title: "Fix reset cover in edit meta information dialog does not actually remove cover"
      tickets: [4731]
  new recipes:
    - title: Kamera Bild
      author: Darko Miletic
    - title: The Online Photographer
      author: Darko Miletic
    - title: The Luminous Landscape
      author: Darko Miletic
    - title: Slovo
      author: Abelturd
    - title: Various Danish newspapers
      author: Darko Miletic
    - title: Heraldo de Aragon
      author: Lorenzo Vigentini
    - title: Orange County Register
      author: Lorenzi Vigentini
    - title: Open Left
      author: Xanthan Gum
    - title: Michelle Malkin
      author: Walt Anthony
    - title: The Metro Montreal
      author: Jerry Clapperton
    - title: The Gazette
      author: Jerry Clapperton
    - title: Macleans Magazine
      author:  Nick Redding
    - title: NY Time Sunday Book Review
      author: Krittika Goyal
    - title: Various Italian newspapers
      author: Lorenzo Vigentini
  improved recipes:
    - The Irish Times
    - Washington Post
    - NIN
    - The Discover Magazine
    - Pagina 12
 - version: 0.6.36
  date: 2010-01-25
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -27,7 +27,7 @@ p.tags {
 p.description {
 	text-align:left;
-	font-style:italic;
+	font-style:normal;
 	margin-top: 0em;
 	}
@ -55,6 +55,14 @@ p.author_index {
 	text-indent: 0em;
 	}
 p.series {
 	text-align: left;
 	margin-top:0px;
 	margin-bottom:0px;
 	margin-left:2em;
 	text-indent:-2em;
 	}
 p.read_book {
 	text-align:left;
 	margin-top:0px;
@ -71,3 +79,9 @@ p.unread_book {
 	text-indent:-2em;
 	}
 hr.series_divider {
 	width:50%;
 	margin-left:1em;
 	margin-top:0em;
 	margin-bottom:0em;
 	}
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -0,0 +1,27 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 '''
 Contains various tweaks that affect calibre behavior. Only edit this file if
 you know what you are dong. If you delete this file, it will be recreated from
 defaults.
 '''
 # The algorithm used to assign a new book in an existing series a series number.
 # Possible values are:
 # next - Next available number
 # const - Assign the number 1 always
 series_index_auto_increment = 'next'
 # The algorithm used to copy author to author_sort
 # Possible values are:
 #  invert: use "fn ln" -> "ln, fn" (the original algorithm)
 #  copy  : copy author to author_sort without modification
 #  comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
 author_sort_copy_method = 'invert'
--- a/resources/images/catalog.svg
+++ b/resources/images/catalog.svg
@ -0,0 +1,157 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Generator: Adobe Illustrator 14.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 43363)  -->
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <svg version="1.1" id="svg2" xmlns:svg="http://www.w3.org/2000/svg" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 	 xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" width="128px" height="128px"
 	 viewBox="0 0 128 128" enable-background="new 0 0 128 128" xml:space="preserve">
 <filter  id="filter5365">
 	<feGaussianBlur  stdDeviation="1.3829225" inkscape:collect="always" id="feGaussianBlur5367"></feGaussianBlur>
 </filter>
 <g id="layer1">
 </g>
 <g id="layer2">
 	<polygon id="rect3200" opacity="0.5722" fill="#0000A4" enable-background="new    " points="167.5,297.005 171.429,297.005 
 		171.429,297.005 	"/>
 	<g id="path5265" filter="url(#filter5365)">
 		<polygon fill="#362D2D" points="21.951,79.904 70.397,63.09 119.953,80.636 70.397,97.084 		"/>
 		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="21.951,79.904 70.397,63.09 
 			119.953,80.636 70.397,97.084 		"/>
 	</g>
 	<g id="path5267" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M118.639,100.902v1.724l-46.437,15.432c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068
 			l2.322,16.553L118.639,100.902z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M118.639,100.902v1.724l-46.437,15.432
 			c-3.723-9.284-1.901-16.34,0.089-20.69l46.883-15.518l-6.34,2.068l2.322,16.553L118.639,100.902z"/>
 	</g>
 	<g id="path5269" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986c0,0-1.515-3.455-1.942-9.812
 			C68.936,101.726,70.711,98.81,70.711,98.81z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M70.711,98.81l47.581-15.743l0.29,18.582l-47.56,15.986
 			c0,0-1.515-3.455-1.942-9.812C68.936,101.726,70.711,98.81,70.711,98.81z"/>
 	</g>
 	<g id="path5271" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019l-50.008-16.208
 			C17.974,94.288,17.113,87.874,21.479,79.607z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M21.479,79.607l49.115,17.501c-3.287,7.816-2.385,15.202,0.982,23.019
 			l-50.008-16.208C17.974,94.288,17.113,87.874,21.479,79.607z"/>
 	</g>
 	<g id="path5273" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M120.871,99.092v4.827l-50.008,16.897l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346
 			l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139L120.871,99.092z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M120.871,99.092v4.827l-50.008,16.897
 			l-49.651-15.863c-4.763-11.162-1.987-18.682,0.714-25.346l49.651-16.724l48.579,17.242v3.449l-2.143,1.033l0.357,14.139
 			L120.871,99.092z"/>
 	</g>
 	<path id="path5385" fill="#78CE4F" d="M19.316,78.05l48.438-17.414l49.548,18.171L67.754,95.842L19.316,78.05z"/>
 	<path id="path5387" fill="none" stroke="#0F973B" stroke-width="1.9" d="M115.988,99.796v1.786l-46.43,15.982
 		c-3.722-9.616-1.901-16.924,0.09-21.43l46.875-16.07l-6.34,2.143l2.322,17.143L115.988,99.796z"/>
 		<radialGradient id="path5389_1_" cx="498.3457" cy="267.1621" r="27.1927" gradientTransform="matrix(-0.064 0.175 1.8694 0.6835 -425.1342 -169.6643)" gradientUnits="userSpaceOnUse">
 		<stop  offset="0" style="stop-color:#B5FFA6"/>
 		<stop  offset="1" style="stop-color:#76E976"/>
 	</radialGradient>
 	<path id="path5389" fill="url(#path5389_1_)" stroke="#003131" stroke-width="1.6" stroke-opacity="0.9608" d="M18.845,77.742
 		l49.107,18.125c-3.287,8.096-2.385,15.744,0.981,23.84l-50-16.786C15.339,92.946,14.479,86.304,18.845,77.742z"/>
 	<path id="path5391" fill="none" stroke="#003131" stroke-width="2.7" stroke-linejoin="bevel" stroke-opacity="0.9608" d="
 		M118.22,97.921v5l-50,17.5l-49.643-16.429c-4.762-11.561-1.987-19.348,0.714-26.25l49.642-17.321l48.572,17.857v3.571l-2.143,1.071
 		l0.356,14.644L118.22,97.921z"/>
 	<path id="path5393" fill="#FFFFFF" d="M68.068,97.629l47.572-16.305l0.29,19.245l-47.194,16.423c0,0-1.424-2.819-2.12-10.029
 		C66.471,100.649,68.068,97.629,68.068,97.629z"/>
 	<g id="path5419" filter="url(#filter5365)">
 		<polygon fill="#362D2D" points="8.737,52.047 57.183,35.233 106.738,52.778 57.183,69.227 		"/>
 		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="8.737,52.047 57.183,35.233 
 			106.738,52.778 57.183,69.227 		"/>
 	</g>
 	<g id="path5421" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M105.424,73.045v1.724L58.988,90.2c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069
 			l2.322,16.552L105.424,73.045z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M105.424,73.045v1.724L58.988,90.2
 			c-3.723-9.284-1.902-16.34,0.089-20.69l46.882-15.518l-6.341,2.069l2.322,16.552L105.424,73.045z"/>
 	</g>
 	<g id="path5423" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777c0,0-1.515-3.455-1.942-9.812
 			C55.721,73.869,57.497,70.953,57.497,70.953z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M57.497,70.953l47.581-15.744l0.289,18.582L57.809,89.777
 			c0,0-1.515-3.455-1.942-9.812C55.721,73.869,57.497,70.953,57.497,70.953z"/>
 	</g>
 	<g id="path5425" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018L8.354,76.062
 			C4.759,66.431,3.899,60.017,8.265,51.751z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M8.265,51.751l49.116,17.501c-3.288,7.816-2.385,15.201,0.982,23.018
 			L8.354,76.062C4.759,66.431,3.899,60.017,8.265,51.751z"/>
 	</g>
 	<g id="path5427" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M107.656,71.234v4.828L57.648,92.959L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725
 			l48.58,17.242v3.448l-2.144,1.035l0.357,14.139L107.656,71.234z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M107.656,71.234v4.828L57.648,92.959
 			L7.998,77.097C3.234,65.934,6.011,58.415,8.712,51.751l49.651-16.725l48.58,17.242v3.448l-2.144,1.035l0.357,14.139
 			L107.656,71.234z"/>
 	</g>
 	<path id="path5431" fill="#60BAFF" stroke="#003244" stroke-width="1.2507" stroke-linejoin="bevel" d="M6.102,50.193L54.54,32.779
 		l49.548,18.171L54.54,67.985L6.102,50.193z"/>
 	<path id="path5433" fill="none" stroke="#0056D5" stroke-width="2.8104" d="M102.768,71.76v1.803L56.35,89.701
 		c-3.721-9.71-1.901-17.089,0.089-21.639l46.865-16.229l-6.338,2.164l2.321,17.312L102.768,71.76z"/>
 		<radialGradient id="path5435_1_" cx="316.8916" cy="261.2949" r="27.1937" gradientTransform="matrix(-0.0902 0.2793 1.9257 0.6218 -445.576 -180.1955)" gradientUnits="userSpaceOnUse">
 		<stop  offset="0" style="stop-color:#789DED"/>
 		<stop  offset="1" style="stop-color:#2381E8"/>
 	</radialGradient>
 	<path id="path5435" fill="url(#path5435_1_)" stroke="#003244" stroke-width="1.6" d="M5.63,49.885L54.738,68.01
 		c-3.287,8.096-2.385,15.744,0.982,23.84l-50-16.785C2.125,65.09,1.265,58.447,5.63,49.885z"/>
 	<path id="path5437" fill="none" stroke="#003244" stroke-width="2.7" stroke-linejoin="bevel" d="M105.006,70.064v5l-50,17.5
 		L5.363,76.135c-4.762-11.561-1.987-19.348,0.714-26.25L55.72,32.564l48.571,17.857v3.572l-2.143,1.071l0.357,14.643L105.006,70.064
 		z"/>
 	<path id="path5439" fill="#FFFFFF" d="M54.854,69.772l47.573-16.306l0.29,19.245L55.522,89.135c0,0-1.425-2.819-2.121-10.028
 		C53.256,72.793,54.854,69.772,54.854,69.772z"/>
 	<g id="path5447" filter="url(#filter5365)">
 		<polygon fill="#362D2D" points="25.88,28.119 74.326,11.305 123.882,28.85 74.326,45.299 		"/>
 		<polygon fill="none" stroke="#362D2D" stroke-width="1.2507" stroke-linejoin="bevel" points="25.88,28.119 74.326,11.305 
 			123.882,28.85 74.326,45.299 		"/>
 	</g>
 	<g id="path5449" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M122.567,49.116v1.724L76.131,66.271c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069
 			l2.321,16.552L122.567,49.116z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="1.9" d="M122.567,49.116v1.724L76.131,66.271
 			c-3.723-9.284-1.902-16.34,0.09-20.69l46.883-15.518l-6.341,2.069l2.321,16.552L122.567,49.116z"/>
 	</g>
 	<g id="path5451" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849c0,0-1.514-3.455-1.941-9.812
 			C72.863,49.94,74.641,47.024,74.641,47.024z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="2.1" d="M74.641,47.024l47.58-15.744l0.289,18.582L74.951,65.849
 			c0,0-1.514-3.455-1.941-9.812C72.863,49.94,74.641,47.024,74.641,47.024z"/>
 	</g>
 	<g id="path5453" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018L25.498,52.133
 			C21.902,42.502,21.042,36.088,25.408,27.822z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="1.6" d="M25.408,27.822l49.115,17.5c-3.287,7.816-2.385,15.202,0.982,23.018
 			L25.498,52.133C21.902,42.502,21.042,36.088,25.408,27.822z"/>
 	</g>
 	<g id="path5455" filter="url(#filter5365)">
 		<path fill="#362D2D" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725
 			l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
 		<path fill="none" stroke="#362D2D" stroke-width="2.7" stroke-linejoin="bevel" d="M124.8,47.306v4.828L74.791,69.03L25.14,53.168
 			c-4.763-11.163-1.987-18.682,0.714-25.346l49.651-16.725l48.58,17.242v3.449l-2.145,1.034l0.357,14.139L124.8,47.306z"/>
 	</g>
 	<path id="path5459" fill="#FF7272" d="M23.245,26.264L71.684,8.85l49.547,18.171L71.684,44.057L23.245,26.264z"/>
 	<path id="path5461" fill="none" stroke="#CF0505" stroke-width="1.9" d="M119.916,48.01v1.786L73.488,65.778
 		c-3.723-9.616-1.902-16.923,0.089-21.429l46.875-16.071l-6.339,2.143l2.32,17.143L119.916,48.01z"/>
 		<radialGradient id="path5463_1_" cx="14.938" cy="-466.4766" r="27.3207" gradientTransform="matrix(2.5834 0.998 0.0835 -0.2162 46.7076 -68.8071)" gradientUnits="userSpaceOnUse">
 		<stop  offset="0" style="stop-color:#FD8A8A"/>
 		<stop  offset="1" style="stop-color:#FF7878"/>
 	</radialGradient>
 	<path id="path5463" fill="url(#path5463_1_)" stroke="#600101" stroke-width="1.6" d="M22.773,25.957l49.107,18.125
 		c-3.287,8.095-2.385,15.744,0.982,23.839l-50-18.806C19.268,39.14,18.408,34.518,22.773,25.957z"/>
 		<linearGradient id="path3311_1_" gradientUnits="userSpaceOnUse" x1="-1.3145" y1="103.2168" x2="67.4683" y2="103.2168" gradientTransform="matrix(1 0 0 -1 5.4287 129.1426)">
 		<stop  offset="0" style="stop-color:#FFFFFF"/>
 		<stop  offset="1" style="stop-color:#FFFFFF;stop-opacity:0.2471"/>
 	</linearGradient>
 	<path id="path3311" fill="url(#path3311_1_)" d="M23.904,25.736L72.342,8.322l49.548,18.171L72.342,43.529L23.904,25.736z"/>
 	<path id="path5465" fill="none" stroke="#600101" stroke-width="2.7" stroke-linejoin="bevel" d="M122.148,46.135v5l-50,17.5
 		l-49.39-18.701c-4.762-11.562-2.239-17.076,0.461-23.977L72.863,8.635l48.57,17.857v3.571l-2.143,1.071l0.357,14.643
 		L122.148,46.135z"/>
 	<path id="path5467" fill="#FFFFFF" d="M71.997,45.844l47.573-16.306l0.289,19.246L72.666,65.206c0,0-1.426-2.819-2.121-10.028
 		C70.399,48.864,71.997,45.844,71.997,45.844z"/>
 </g>
 </svg>
--- a/resources/images/news/digitalspy_uk.png
+++ b/resources/images/news/digitalspy_uk.png
--- a/resources/images/news/elcomercio.png
+++ b/resources/images/news/elcomercio.png
--- a/resources/images/news/gizmodo.png
+++ b/resources/images/news/gizmodo.png
--- a/resources/images/news/kamerabild.png
+++ b/resources/images/news/kamerabild.png
--- a/resources/images/news/newsstraitstimes.png
+++ b/resources/images/news/newsstraitstimes.png
--- a/resources/images/news/radikal_tr.png
+++ b/resources/images/news/radikal_tr.png
--- a/resources/images/news/readitlater.png
+++ b/resources/images/news/readitlater.png
--- a/resources/images/news/sueddeutschezeitung.png
+++ b/resources/images/news/sueddeutschezeitung.png
--- a/resources/images/news/theluminouslandscape.png
+++ b/resources/images/news/theluminouslandscape.png
--- a/resources/images/news/tidbits.png
+++ b/resources/images/news/tidbits.png
--- a/resources/kathemerini.recipe
+++ b/resources/kathemerini.recipe
@ -0,0 +1,37 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Kathimerini(BasicNewsRecipe):
    title                  = 'Kathimerini'
    __author__             = 'Pan'
    description            = 'News from Greece'
    max_articles_per_feed  = 100
    oldest_article = 100
    publisher              = 'Kathimerini'
    category               = 'news, GR'
    language               = 'el'
    no_stylesheets         = True
    remove_tags_before = dict(name='td',attrs={'class':'news'})
    remove_tags_after = dict(name='td',attrs={'class':'news'})
    remove_attributes = ['width', 'src','header','footer']
    feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae',
  'http://wk.kathimerini.gr/xml_files/politics.xml'),
 (u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1',
  ' http://wk.kathimerini.gr/xml_files/ell.xml'),
 (u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2',
  ' http://wk.kathimerini.gr/xml_files/world.xml'),
 (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
  'http://wk.kathimerini.gr/xml_files/economy_1.xml'),
 (u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2',
  'http://wk.kathimerini.gr/xml_files/economy_2.xml'),
 (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1',
  'http://wk.kathimerini.gr/xml_files/economy_3.xml'),
 (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2',
  'http://wk.kathimerini.gr/xml_files/civ.xml'),
 (u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2',
  'http://wk.kathimerini.gr/xml_files/st.xml')]
    def print_version(self, url):
        return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/')
--- a/resources/recipes/ZIVE.sk.recipe
+++ b/resources/recipes/ZIVE.sk.recipe
@ -0,0 +1,45 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class ZiveRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'Abelturd'
    language = 'sk'
    version = 1
    title = u'ZIVE.sk'
    publisher = u''
    category = u'News, Newspaper'
    description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
    encoding = 'UTF-8'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
    feeds = []
    feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
    preprocess_regexps = [
        (re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
        lambda match: ''),
     ]
    remove_tags = []
    keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
    extra_css = '''
                h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
                h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
                '''
--- a/resources/recipes/clarin.recipe
+++ b/resources/recipes/clarin.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 clarin.com
 '''
@ -21,7 +21,8 @@ class Clarin(BasicNewsRecipe):
    cover_url             = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
    encoding              = 'cp1252'
    language              = 'es'
-    extra_css             = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
+    masthead_url          = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} '
    conversion_options = {
                          'comment'  : description
--- a/resources/recipes/courrier.recipe
+++ b/resources/recipes/courrier.recipe
@ -0,0 +1,26 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class CourierPress(BasicNewsRecipe):
    title          = u'Courier Press'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    remove_stylesheets = True
    remove_tags = [
       dict(name='iframe'),
    ]
    feeds          = [
 ('Courier Press',
 'http://www.courierpress.com/rss/headlines/news/'),
 ]
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'article_body'})
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -1,64 +1,63 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 danas.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Danas(BasicNewsRecipe):
    title                 = 'Danas'
    __author__            = 'Darko Miletic'
-    description           = 'Vesti'
+    description           = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
    publisher             = 'Danas d.o.o.'
    category              = 'news, politics, Serbia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
    encoding              = 'utf-8'
    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
    language              = 'sr'
-    lang                  = 'sr-Latn-RS'
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
    direction             = 'ltr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'pretty_print'     : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    keep_only_tags     = [dict(name='div', attrs={'id':'left'})]
    remove_tags = [
                     dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
                    ,dict(name='div', attrs={'id':'comments'})
-                    ,dict(name=['object','link'])
+                    ,dict(name=['object','link','iframe'])
                  ]
-    feeds          = [ 
+    feeds          = [
-                        (u'Vesti'   , u'http://www.danas.rs/rss/rss.asp'            )
+                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
-                       ,(u'Periskop', u'http://www.danas.rs/rss/rss.asp?column_id=4')
+                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
                       ,(u'Dijalog'  , u'http://www.danas.rs/rss/rss.asp?column_id=1' )
                       ,(u'Ekonomija', u'http://www.danas.rs/rss/rss.asp?column_id=6' )
                       ,(u'Svet'     , u'http://www.danas.rs/rss/rss.asp?column_id=25')
                       ,(u'Srbija'   , u'http://www.danas.rs/rss/rss.asp?column_id=28')
                       ,(u'Kultura'  , u'http://www.danas.rs/rss/rss.asp?column_id=5' )
                       ,(u'Sport'    , u'http://www.danas.rs/rss/rss.asp?column_id=13')
                       ,(u'Scena'    , u'http://www.danas.rs/rss/rss.asp?column_id=42')
                       ,(u'Feljton'  , u'http://www.danas.rs/rss/rss.asp?column_id=19')
                       ,(u'Periskop' , u'http://www.danas.rs/rss/rss.asp?column_id=4' )
                     ]
    def preprocess_html(self, soup):
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+        for item in soup.findAll(style=True):
-        soup.head.insert(0,mlang)
+            del item['style']
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        return soup
    def print_version(self, url):
        return url + '&action=print'
--- a/resources/recipes/digitalspy_uk.recipe
+++ b/resources/recipes/digitalspy_uk.recipe
@ -0,0 +1,43 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.digitalspy.co.uk
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DigitalSpyUK(BasicNewsRecipe):
    title                 = 'Digital Spy - UK Edition'
    __author__            = 'Darko Miletic'
    description           = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
    publisher             = 'Digital Spy Limited.'
    category              = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en_GB'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags       = [dict(name=['link'])]
    remove_attributes = ['height','width']
    keep_only_tags    = [dict(name='div',attrs={'id':'article'})]
    feeds = [
              (u'News'          , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'          )
             ,(u'Big Brother'   , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml'   )
             ,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
             ,(u'General'       , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml'      )
             ,(u'Media'         , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml'        )
            ]
--- a/resources/recipes/dilbert.recipe
+++ b/resources/recipes/dilbert.recipe
@ -3,6 +3,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.dilbert.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -28,6 +29,12 @@ class DosisDiarias(BasicNewsRecipe):
    feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
    preprocess_regexps = [
                    (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE),
                        lambda match: 'strip.zoom.gif')
                            ]
    def get_article_url(self, article):
        return article.get('feedburner_origlink', None)
--- a/resources/recipes/discover_magazine.recipe
+++ b/resources/recipes/discover_magazine.recipe
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
-doscovermagazine.com
+discovermagazine.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
@ -12,42 +12,36 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class DiscoverMagazine(BasicNewsRecipe):
    title = u'Discover Magazine'
-    description = u'Science, Technology and the Future'
+    description = u'Science, Technology and the Future' 
-    __author__ = 'Mike Diaz'
+    __author__ = 'Mike Diaz' 
    language = 'en'
    oldest_article = 33
    max_articles_per_feed = 20
    no_stylesheets = True
-    remove_javascript     = True
+    remove_javascript = True
    use_embedded_content  = False
    encoding = 'utf-8'
    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
    remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
                   dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
-    remove_tags_before = dict(id='articlePage')
+    remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
-
+ 
    keep_only_tags = [dict(name='div', attrs={'id':'articlePage'})]
    remove_tags = [dict(attrs={'id':['buttons', 'tool-box', 'teaser', 'already-subscriber', 'teaser-suite',  'related-articles', 'relatedItem', 'box-popular', 'box-blogs', 'box-news', 'footer']}),
                            dict(attrs={'class':'popularNewsBox'}),
                            dict(name=['img', 'style', 'head'])]
    remove_tags_after = dict(id='articlePage')
    feeds = [
-             (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
+             (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'), 
-             (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
+             (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'), 
-             (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
+             (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'), 
-             (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
+             (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'), 
-             (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
+             (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'), 
-             (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
+             (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'), 
-             (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
+             (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'), 
-             (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
+             (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'), 
-             (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
+             (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'), 
-             (u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
+             (u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'), 
-             (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
+             (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'), 
-             (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
+             (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'), 
-             (u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'),
+             (u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'), 
             (u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php')
-            ]
+            ]
--- a/resources/recipes/eksiazki.recipe
+++ b/resources/recipes/eksiazki.recipe
@ -0,0 +1,26 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v2'
 __copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 eksiazki.org
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class eksiazki(BasicNewsRecipe):
    title          = u'eksiazki.org'
    desciption     = u'Twoje centrum wiedzy o epapierze i ebookach'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'
    no_stylesheets = True
    remove_javascript = True
    feeds          = [(u'wpisy', u'http://www.eksiazki.org/feed/')]
    keep_only_tags = [dict(name='div', attrs={'id':'content-body'})]
    remove_tags = [
 	    dict(name='span', attrs={'class':'nr_comm'}),
 	    dict(name='div', attrs={'id':'tabsContainer'}),
        dict(name='div', attrs={'class':'next_previous_links'})]
--- a/resources/recipes/elcomercio.recipe
+++ b/resources/recipes/elcomercio.recipe
@ -0,0 +1,38 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 elcomercio.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ElComercio(BasicNewsRecipe):
    title                 = 'El Comercio '
    __author__            = 'Darko Miletic'
    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
    publisher             = 'GRUPO EL COMERCIO C.A.'
    category              = 'news, Ecuador, politics'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language              = 'es'
    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_attributes = ['width','height']
    feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/gizmodo.recipe
+++ b/resources/recipes/gizmodo.recipe
@ -0,0 +1,40 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 gizmodo.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Gizmodo(BasicNewsRecipe):
    title                 = 'Gizmodo'
    __author__            = 'Darko Miletic'
    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
    publisher             = 'gizmodo.com'
    category              = 'news, IT, Internet, gadgets'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_attributes = ['width','height']
    remove_tags       = [dict(name='div',attrs={'class':'feedflare'})]
    remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
-        'mailingListTout', 'partnerCenter', 'pageFooter']),
+        'mailingListTout', 'partnerCenter', 'pageFooter',
        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
--- a/resources/recipes/iliteratura_cz.recipe
+++ b/resources/recipes/iliteratura_cz.recipe
@ -0,0 +1,47 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class SmeRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'Abelturd'
    language = 'cz'
    version = 1
    title = u'iLiteratura.cz'
    publisher = u''
    category = u'News, Newspaper'
    description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
    cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    feeds = []
    feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
    keep_only_tags = []
    remove_tags = [dict(name='table'),dict(name='h3')]
    preprocess_regexps = [
        (re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
        lambda match: ''),
     ]
    def print_version(self, url):
         m = re.search('(?<=ID=)[0-9]*', url)
         return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
    extra_css = '''
                  h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
                  h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
                '''
--- a/resources/recipes/ilsole24ore.recipe
+++ b/resources/recipes/ilsole24ore.recipe
@ -0,0 +1,67 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini & Edwin van Maastrigt'
 __copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
 __description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
 '''
 http://www.ilsole24ore.com/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ilsole(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini & Edwin van Maastrigt'
    description   = 'Financial news daily paper'
    cover_url      = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
    title          = u'il Sole 24 Ore '
    publisher      = 'italiaNews'
    category       = 'News, finance, economy, politics'
    language       = 'it'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 2
    max_articles_per_feed = 50
    use_embedded_content  = False
    remove_javascript     = True
    no_stylesheets        = True
    def get_article_url(self, article):
        return article.get('id', article.get('guid', None))
    def print_version(self, url):
        link, sep, params = url.rpartition('?')
        return link.replace('.shtml', '_PRN.shtml')
    keep_only_tags     = [
                            dict(name='div', attrs={'class':'txt'})
                        ]
    remove_tags = [dict(name='br')]
    feeds          = [
                       (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
                       (u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
                       (u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
                       (u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
                       (u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
                       (u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
                       (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
                       (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
                       (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
                       (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
                     ]
    extra_css = '''
                html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
                .linkHighlight {color:#0292c6;}
                .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
                .txt p {line-height:18px;}
                .txt span {line-height:22px;}
                .title h3 {color:#7b7b7b;}
                .title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
                '''
--- a/resources/recipes/jpost.recipe
+++ b/resources/recipes/jpost.recipe
@ -10,22 +10,19 @@ class JerusalemPost(BasicNewsRecipe):
    __author__ = 'Kovid Goyal'
    max_articles_per_feed = 10
    no_stylesheets = True
-    remove_tags_before = {'class':'byline'}
+    remove_tags_before = {'class':'jp-grid-content'}
-    remove_tags    = [
+    remove_tags_after = {'id':'body_val'}
-                      {'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox',
+
                                'slideshow']},
                       dict(id=['artFontButtons', 'artRelatedBlock']),
                     ]
    remove_tags_after = {'id':'artTxtBlock'}
    feeds =  [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
               ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
               ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'),
               ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'),
               ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
          ]
-          
+
-    def postprocess_html(self, soup, first):
+    def preprocess_html(self, soup):
-        for tag in soup.findAll(name=['table', 'tr', 'td']):
+        for x in soup.findAll(name=['form', 'input']):
-            tag.name = 'div'
+            x.name = 'div'
-        return soup
+        for x in soup.findAll('body', style=True):
            del x['style']
        return soup
--- a/resources/recipes/kamerabild.recipe
+++ b/resources/recipes/kamerabild.recipe
@ -0,0 +1,46 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kamerabild.se
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kamerabild(BasicNewsRecipe):
    title                 = 'Kamera & Bild'
    __author__            = 'Darko Miletic'
    description           = 'Photo News from Sweden'
    publisher             = 'politiken.dk'
    category              = 'news, photograph, Sweden'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_empty_feeds    = True
    use_embedded_content  = False
    encoding              = 'utf8'
    language              = 'sv'
    extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    feeds              = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
    keep_only_tags     = [dict(name='div',attrs={'class':'container'})]
    remove_tags_after = dict(name='div',attrs={'class':'editor'})
    remove_tags        = [
                            dict(name=['object','link','iframe'])
                           ,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
                         ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lanacion.com.ar
 '''
@ -12,28 +10,34 @@ class Lanacion(BasicNewsRecipe):
    title                 = 'La Nacion'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y el resto del mundo'
-    publisher             = 'La Nacion'
+    publisher             = 'La Nacion S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    use_embedded_content  = False
    remove_javascript     = True
    no_stylesheets        = True
    language              = 'es'
    encoding              = 'cp1252'
    masthead_url          = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
    extra_css             = ' h1{font-family: Georgia,serif} body{font-family: Arial,sans-serif} img{margin-top: 0.5em; margin-bottom: 0.2em} .notaEpigrafe{font-size: x-small} '
    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
                    ,dict(name='ul'  , attrs={'class':'cajaHerramientas cajaTop noprint'})
                    ,dict(name='div' , attrs={'class':'cajaHerramientas noprint'        })
                    ,dict(attrs={'class':['titulosMultimedia','derecha','techo color']})
                    ,dict(name=['iframe','embed','object'])
                  ]
    remove_attributes = ['height','width']
    feeds          = [
                         (u'Ultimas noticias'     , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2'         )
@ -51,10 +55,4 @@ class Lanacion(BasicNewsRecipe):
                     ]
    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
+        return self.adeify_images(soup)
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = 'es'
--- a/resources/recipes/lescienze.recipe
+++ b/resources/recipes/lescienze.recipe
@ -0,0 +1,89 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __author__      = 'Lorenzo Vigentini'
 __copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
 __version__     = 'v1.01'
 __date__        = '10, January 2010'
 __description__ = 'Monthly Italian edition of Scientific American'
 '''
 http://lescienze.espresso.repubblica.it/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class leScienze(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini'
    description   = 'Monthly Italian edition of Scientific American'
    cover_url      = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
    title          = 'le Scienze'
    publisher      = 'Gruppo editoriale lEspresso'
    category       = 'Science, general interest'
    language       = 'it'
    encoding       = 'cp1252'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article        = 31
    max_articles_per_feed = 20
    use_embedded_content  = False
    recursion             = 10
    remove_javascript     = True
    no_stylesheets        = True
    keep_only_tags     = [
                            dict(name='div', attrs={'class':'bigbox'})
                        ]
    remove_tags        = [
                            dict(name='span',attrs={'class':'linkindice'}),
                            dict(name='div',attrs={'class':'box-commenti'}),
                            dict(name='div',attrs={'id':['rssdiv','blocco']})
                         ]
    remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
    feeds          = [
                       (u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
                       (u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
                       (u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
                       (u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
                       (u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
                       (u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
                       (u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
                       (u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
                       (u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
                       (u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
                       (u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
                       (u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
                       (u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
                       (u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
                       (u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
                       (u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
                       (u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
                       (u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
                       (u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
                       (u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
                       (u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
                       (u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
                       (u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
                       (u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
                     ]
    extra_css = '''
                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
                .occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
                .titolo {font-weight:bold;}
                .label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
                .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
                .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
                '''
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@ -4,21 +4,26 @@ class Metro_Montreal(BasicNewsRecipe):
    title          = u'M\xe9tro Montr\xe9al'
    __author__     = 'Jerry Clapperton'
-    description    = u'Le quotidien le plus branch\xe9 sur le monde'
+    description    = 'Le quotidien le plus branch\xe9 sur le monde'
-    language = 'fr'
+    language       = 'fr'
-    oldest_article = 7
+    oldest_article        = 7
    max_articles_per_feed = 20
    use_embedded_content  = False
-    remove_javascript = True
+    remove_javascript     = True
-    no_stylesheets = True
+    no_stylesheets        = True
-    encoding = 'utf-8'
+    encoding              = 'utf-8'
    extra_css             = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
-    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+    remove_tags = [dict(attrs={'id':'buttons'})]
-    remove_tags = [dict(attrs={'id':'buttons'}), dict(name=['img', 'style'])]
+    feeds = [
-
+             (u"L'info", u'http://journalmetro.com/linfo/rss'),
-    feeds = [(u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), (u'Culture', u'http://journalmetro.com/culture/rss'), (u'Sports', u'http://journalmetro.com/sports/rss'), (u'Paroles', u'http://journalmetro.com/paroles/rss')]
+             (u'Monde', u'http://journalmetro.com/monde/rss'),
             (u'Culture', u'http://journalmetro.com/culture/rss'),
             (u'Sports', u'http://journalmetro.com/sports/rss'),
             (u'Paroles', u'http://journalmetro.com/paroles/rss')
            ]
    def print_version(self, url):
          return url.replace('article', 'ArticlePrint') + '?language=fr'
--- a/resources/recipes/newsstraitstimes.recipe
+++ b/resources/recipes/newsstraitstimes.recipe
@ -0,0 +1,35 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.nst.com.my
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Newstraitstimes(BasicNewsRecipe):
    title                 = 'New Straits Times from Malaysia'
    __author__            = 'Darko Miletic'
    description           = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
    publisher             = 'nst.com.my'
    category              = 'news, politics, Malaysia'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en'
    masthead_url          = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags       = [dict(name=['link','table'])]
    keep_only_tags = dict(name='div',attrs={'id':'haidah'})
    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
--- a/resources/recipes/nin.recipe
+++ b/resources/recipes/nin.recipe
@ -72,9 +72,8 @@ class Nin(BasicNewsRecipe):
            section  = self.tag_to_string(item)
            feedlink = self.PREFIX + item['href']
            feedpage = self.index_to_soup(feedlink)
-            self.report_progress(0, _('Fetching feed')+' %s...'%(section))            
+            self.report_progress(0, _('Fetching feed')+' %s...'%(section))
            inarts   = []
            count2 = 0
            for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
                alink = art.parent
                url   = self.PREFIX + alink['href']
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
+import re, time
 from calibre import entity_to_unicode
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
                   dict(name=['script', 'noscript', 'style'])]
    encoding = decode
    no_stylesheets = True
-    extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+    extra_css = 'h1 {font-face:sans-serif; font-size:2em; font-weight:bold;}\n.byline {font:monospace;}\n.bold {font-weight:bold;}'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
--- a/resources/recipes/nytimesbook.recipe
+++ b/resources/recipes/nytimesbook.recipe
@ -0,0 +1,56 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class NewYorkTimesBookReview(BasicNewsRecipe):
    title          = u'New York Times Book Review'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 8 #days
    max_articles_per_feed = 1000
    recursions = 2
    #encoding = 'latin1'
    remove_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
    remove_tags_after  = dict(name='div', attrs={'id':'authorId'})
    remove_tags = [
       dict(name='iframe'),
       dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
       dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
       #dict(name='ul', attrs={'class':'article-tools'}),
       #dict(name='ul', attrs={'class':'articleTools'}),
    ]
    match_regexps = [
            r'http://www.nytimes.com/.+pagewanted=[2-9]+'
            ]
    feeds          = [
 ('New York Times Sunday Book Review',
 'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
 ]
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'article'})
        #td = heading.findParent(name='td')
        #td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        #for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x):
             #p = x.findParent('p')
             #if p is not None:
                  #p.extract()
        return soup
    def postprocess_html(self, soup, first):
        for div in soup.findAll(id='pageLinks'):
            div.extract()
        if not first:
            h1 = soup.find('h1')
            if h1 is not None:
                h1.extract()
            t = soup.find(attrs={'class':'timestamp'})
            if t is not None:
                t.extract()
        return soup
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@ -1,13 +1,12 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 pagina12.com.ar
 '''
-import time
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class Pagina12(BasicNewsRecipe):
    title                 = 'Pagina - 12'
@ -16,13 +15,14 @@ class Pagina12(BasicNewsRecipe):
    publisher             = 'La Pagina S.A.'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
-    extra_css             = ' body{font-family: sans-serif} '
+    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }  '
    conversion_options = {
                          'comment'   : description
@ -45,14 +45,24 @@ class Pagina12(BasicNewsRecipe):
             ,(u'NO'             , u'http://www.pagina12.com.ar/diario/rss/no.xml'          )
             ,(u'Las/12'         , u'http://www.pagina12.com.ar/diario/rss/las12.xml'       )
             ,(u'Soy'            , u'http://www.pagina12.com.ar/diario/rss/soy.xml'         )
-             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
+             ,(u'Futuro'         , u'http://www.pagina12.com.ar/diario/rss/futuro.xml'      )
             ,(u'M2'             , u'http://www.pagina12.com.ar/diario/rss/m2.xml'          )
             ,(u'Rosario/12'     , u'http://www.pagina12.com.ar/diario/rss/rosario.xml'     )
            ]
    def print_version(self, url):
        return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
    def get_cover_url(self):
-        imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
+        rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
-        weekday = time.localtime().tm_wday
+        rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
-        return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
+        soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
        for image in soup.findAll('img',alt=True):
           if image['alt'].startswith('Tapa de la fecha'):
              return image['src']
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/people_us_mashup.recipe
+++ b/resources/recipes/people_us_mashup.recipe
@ -31,7 +31,7 @@ class PeopleMag(BasicNewsRecipe):
    keep_only_tags = [
-              dict(name='div', attrs={'class': 'panel_news_article_main'}),
+              dict(name='div', attrs={'class': 'panel_news_article_main'}), 	
 	        dict(name='div', attrs={'class':'article_content'}),
              dict(name='div', attrs={'class': 'headline'}),
              dict(name='div', attrs={'class': 'post'}),
@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
         dict(name='div', attrs={'class':'sharelinkcont'}),
         dict(name='div', attrs={'class':'categories'}),
         dict(name='ul', attrs={'class':'categories'}),
         dict(name='div', attrs={'class':'related_content'}),
         dict(name='div', attrs={'id':'promo'}),
         dict(name='div', attrs={'class':'linksWrapper'}),
         dict(name='p', attrs={'class':'tag tvnews'}),
--- a/resources/recipes/radikal_tr.recipe
+++ b/resources/recipes/radikal_tr.recipe
@ -0,0 +1,45 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 radikal.com.tr
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Radikal_tr(BasicNewsRecipe):
    title                 = 'Radikal - Turkey'
    __author__            = 'Darko Miletic'
    description           = 'News from Turkey'
    publisher             = 'radikal'
    category              = 'news, politics, Turkey'
    oldest_article        = 2
    max_articles_per_feed = 150
    no_stylesheets        = True
    encoding              = 'cp1254'
    use_embedded_content  = False
    masthead_url          = 'http://www.radikal.com.tr/D/i/1/V2/radikal_logo.jpg'
    language              = 'tr'
    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif } '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [dict(name=['embed','iframe','object','link','base'])]
    remove_tags_before = dict(name='h1')
    remove_tags_after = dict(attrs={'id':'haberDetayYazi'})
    feeds = [(u'Yazarlar', u'http://www.radikal.com.tr/d/rss/RssYazarlar.xml')]
    def print_version(self, url):
        articleid = url.rpartition('ArticleID=')[2]
        return 'http://www.radikal.com.tr/Default.aspx?aType=HaberYazdir&ArticleID=' + articleid
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/readitlater.recipe
+++ b/resources/recipes/readitlater.recipe
@ -0,0 +1,64 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 readitlaterlist.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Readitlater(BasicNewsRecipe):
    title                 = 'Read It Later'
    __author__            = 'Darko Miletic'
    description           = '''Personalized news feeds. Go to readitlaterlist.com to
                               setup up your news. Fill in your account
                               username, and optionally you can add password.'''
    publisher             = 'readitlater.com'
    category              = 'news, custom'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'
    feeds = [(u'Unread articles' , INDEX + u'/unread')]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None:
            br.open(self.LOGIN)
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
               br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            ritem = soup.find('ul',attrs={'id':'list'})
            for item in ritem.findAll('li'):
                description = ''
                atag = item.find('a',attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
                    date        = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
--- a/resources/recipes/sueddeutschezeitung.recipe
+++ b/resources/recipes/sueddeutschezeitung.recipe
@ -0,0 +1,107 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.sueddeutsche.de/sz/
 '''
 import urllib
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class SueddeutcheZeitung(BasicNewsRecipe):
    title                  = 'Sueddeutche Zeitung'
    __author__             = 'Darko Miletic'
    description            = 'News from Germany. Access to paid content.'
    publisher              = 'Sueddeutche Zeitung'
    category               = 'news, politics, Germany'
    no_stylesheets         = True
    oldest_article         = 2
    encoding               = 'cp1252'
    needs_subscription     = True
    remove_empty_feeds     = True
    PREFIX                 = 'http://www.sueddeutsche.de'
    INDEX                  = PREFIX + strftime('/sz/%Y-%m-%d/')
    LOGIN                  = PREFIX + '/app/lbox/index.html'
    use_embedded_content   = False
    masthead_url           = 'http://pix.sueddeutsche.de/img/g_.gif'
    language               = 'de'
    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    remove_attributes = ['height','width']
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
            data = urllib.urlencode({ 'login_name':self.username
                                     ,'login_passwort':self.password
                                     ,'lboxaction':'doLogin'
                                     ,'passtxt':'Passwort'
                                     ,'referer':self.INDEX
                                     ,'x':'22'
                                     ,'y':'7'
                                   })
            br.open(self.LOGIN,data)
        return br
    remove_tags        =[
                         dict(attrs={'class':'hidePrint'})
                        ,dict(name=['link','object','embed','base','iframe'])
                        ]
    remove_tags_before = dict(name='h2')
    remove_tags_after  = dict(attrs={'class':'author'})
    feeds = [
               (u'Politik'      , INDEX + 'politik/'      )
              ,(u'Seite drei'   , INDEX + 'seitedrei/'    )
              ,(u'Meinungsseite', INDEX + 'meinungsseite/')
              ,(u'Wissen'       , INDEX + 'wissen/'       )
              ,(u'Panorama'     , INDEX + 'panorama/'     )
              ,(u'Feuilleton'   , INDEX + 'feuilleton/'   )
              ,(u'Medien'       , INDEX + 'medien/'       )
              ,(u'Wirtschaft'   , INDEX + 'wirtschaft/'   )
              ,(u'Sport'        , INDEX + 'sport/'        )
              ,(u'Bayern'       , INDEX + 'bayern/'       )
              ,(u'Muenchen'     , INDEX + 'muenchen/'     )
              ,(u'jetzt.de'     , INDEX + 'jetzt.de/'     )
            ]
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            tbl = soup.find(attrs={'class':'szprintd'})
            for item in tbl.findAll(name='td',attrs={'class':'topthema'}):
                atag    = item.find(attrs={'class':'Titel'}).a
                ptag    = item.find('p')
                stag    = ptag.find('script')
                if stag:
                   stag.extract()
                url           = self.PREFIX + atag['href']
                title         = self.tag_to_string(atag)
                description   = self.tag_to_string(ptag)
                articles.append({
                                      'title'      :title
                                     ,'date'       :strftime(self.timefmt)
                                     ,'url'        :url
                                     ,'description':description
                                    })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
    def print_version(self, url):
        return url + 'print.html'
--- a/resources/recipes/telegraph_uk.recipe
+++ b/resources/recipes/telegraph_uk.recipe
@ -9,8 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TelegraphUK(BasicNewsRecipe):
    title                 = u'Telegraph.co.uk'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Darko Miletic and Sujata Raman'
-    description           = 'News from United Kingdom'    
+    description           = 'News from United Kingdom'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
@ -18,23 +18,26 @@ class TelegraphUK(BasicNewsRecipe):
    use_embedded_content  = False
-    extra_css = '''
+    extra_css           = '''
-                h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
-                h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444}
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
-                .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
-                .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                a{color:#234B7B; }
+                        a{color:#234B7B; }
-                .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                '''
+                        '''
-    
+
-    keep_only_tags    = [ 
+    keep_only_tags      = [
                           dict(name='div', attrs={'class':'storyHead'})
                          ,dict(name='div', attrs={'class':'story'    })
-                          #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ]   }) 
+                          #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ]   })
-                        ]
+                          ]
-    remove_tags    = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})]
+    remove_tags         = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
-    
+                          #,dict(name='div', attrs={'class':['toolshideoneQuarter']})
-    feeds          = [
+                          ,dict(name='span', attrs={'class':['num','placeComment']})
                          ]
    feeds               = [
                         (u'UK News'        , u'http://www.telegraph.co.uk/news/uknews/rss'                                      )
                        ,(u'World News'     , u'http://www.telegraph.co.uk/news/worldnews/rss'                                   )
                        ,(u'Politics'       , u'http://www.telegraph.co.uk/news/newstopics/politics/rss'                         )
@ -45,15 +48,27 @@ class TelegraphUK(BasicNewsRecipe):
                        ,(u'Earth News'     , u'http://www.telegraph.co.uk/earth/earthnews/rss'                                  )
                        ,(u'Comment'        , u'http://www.telegraph.co.uk/comment/rss'                                          )
                        ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss'                     )
-                     ]
+                         ]
    def get_article_url(self, article):
-        
+
        url = article.get('guid', None)
-        
+
        if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
            url = None
-        
+
        return url
-   
+
    def postprocess_html(self,soup,first):
        for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
            for pTag in bylineTag.findAll(name='p'):
                if getattr(pTag.contents[0],"Comments",True):
                    pTag.extract()
        return soup
--- a/resources/recipes/the_gazette.recipe
+++ b/resources/recipes/the_gazette.recipe
@ -1,22 +0,0 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class The_Gazette(BasicNewsRecipe):
    cover_url      = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
    title          = u'The Gazette'
    __author__     = 'Jerry Clapperton'
    description    = 'Montreal news in English'
    language = 'en_CA'
    oldest_article = 7
    max_articles_per_feed = 20
    use_embedded_content  = False
    remove_javascript = True
    no_stylesheets = True
    encoding = 'utf-8'
    keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
    feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
--- a/resources/recipes/the_new_republic.recipe
+++ b/resources/recipes/the_new_republic.recipe
@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_tags = [
            dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
        ('Economy', 'http://www.tnr.com/rss/articles/Economy'),
        ('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
        ('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
-        ('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
+        ('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
        ('World', 'http://www.tnr.com/rss/articles/World'),
        ('Film', 'http://www.tnr.com/rss/articles/Film'),
        ('Books', 'http://www.tnr.com/rss/articles/books'),
        ('The Book', 'http://www.tnr.com/rss/book'),
        ('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
        ('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
        ('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
        ('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
        ('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
        ('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
        ('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
        ('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
--- a/resources/recipes/theluminouslandscape.recipe
+++ b/resources/recipes/theluminouslandscape.recipe
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 luminous-landscape.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class theluminouslandscape(BasicNewsRecipe):
    title                 = 'The Luminous Landscape'
    __author__            = 'Darko Miletic'
    description           = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
    publisher             = 'The Luminous Landscape '
    category              = 'news, blog, photograph, international'
    oldest_article        = 15
    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_empty_feeds    = True
    use_embedded_content  = True
    encoding              = 'cp1252'
    language              = 'en'
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    feeds              = [(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
    remove_tags        = [dict(name=['object','link','iframe'])]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/theonlinephotographer.recipe
+++ b/resources/recipes/theonlinephotographer.recipe
@ -0,0 +1,41 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 theonlinephotographer.typepad.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class theonlinephotographer(BasicNewsRecipe):
    title                 = 'The Online Photographer'
    __author__            = 'Darko Miletic'
    description           = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
    publisher             = 'The Online Photographer'
    category              = 'news, blog, photograph, international'
    oldest_article        = 15
    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_empty_feeds    = True
    use_embedded_content  = False
    encoding              = 'utf8'
    language              = 'en'
    extra_css = ' body{font-family: Georgia,"Times New Roman",serif } '
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    feeds              = [(u'Articles', u'http://feeds.feedburner.com/typepad/ZSjz')]
    remove_tags_before = dict(name='h3',attrs={'class':'entry-header'})
    remove_tags_after  = dict(name='div',attrs={'class':'entry-footer'})
    remove_tags        = [dict(name=['object','link','iframe'])]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/tidbits.recipe
+++ b/resources/recipes/tidbits.recipe
@ -0,0 +1,53 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 db.tidbits.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class TidBITS(BasicNewsRecipe):
    title                 = 'TidBITS: Mac News for the Rest of Us'
    __author__            = 'Darko Miletic'
    description           = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
    publisher             = 'TidBITS Publishing Inc.'
    category              = 'news, Apple, Macintosh, IT, Internet'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language              = 'en'
    remove_empty_feeds    = True
    masthead_url          = 'http://db.tidbits.com/images/tblogo9.gif'
    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_attributes = ['width','height']
    remove_tags       = [dict(name='small')]
    remove_tags_after = dict(name='small')
    feeds = [
               (u'Business Apps'              , u'http://db.tidbits.com/feeds/business.rss'     )
              ,(u'Entertainment'              , u'http://db.tidbits.com/feeds/entertainment.rss')
              ,(u'External Links'             , u'http://db.tidbits.com/feeds/links.rss'        )
              ,(u'Home Mac'                   , u'http://db.tidbits.com/feeds/home.rss'         )
              ,(u'Inside TidBITS'             , u'http://db.tidbits.com/feeds/inside.rss'       )
              ,(u'iPod & iPhone'              , u'http://db.tidbits.com/feeds/ipod-iphone.rss'  )
              ,(u'Just for Fun'               , u'http://db.tidbits.com/feeds/fun.rss'          )
              ,(u'Macs & Mac OS X'            , u'http://db.tidbits.com/feeds/macs.rss'         )
              ,(u'Media Creation'             , u'http://db.tidbits.com/feeds/creative.rss'     )
              ,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'          )
              ,(u'Opinion & Editorial'        , u'http://db.tidbits.com/feeds/opinion.rss'      )
              ,(u'Support & Problem Solving'  , u'http://db.tidbits.com/feeds/support.rss'      )
              ,(u'Safe Computing'             , u'http://db.tidbits.com/feeds/security.rss'     )
              ,(u'Tech News'                  , u'http://db.tidbits.com/feeds/tech.rss'         )
              ,(u'Software Watchlist'         , u'http://db.tidbits.com/feeds/watchlist.rss'    )
            ]
--- a/resources/recipes/toronto_sun.recipe
+++ b/resources/recipes/toronto_sun.recipe
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TorontoSun(BasicNewsRecipe):
    title                 = 'Toronto SUN'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Darko Miletic and Sujata Raman'
    description           = 'News from Canada'
    publisher             = 'Toronto Sun'
    category              = 'news, politics, Canada'
@ -21,25 +21,50 @@ class TorontoSun(BasicNewsRecipe):
    encoding              = 'cp1252'
    language              = 'en_CA'
-    conversion_options = {
+    conversion_options  = {
-                          'comment'   : description
+                              'comment'   : description
-                        , 'tags'      : category
+                            , 'tags'      : category
-                        , 'publisher' : publisher
+                            , 'publisher' : publisher
-                        , 'language'  : language
+                            , 'language'  : language
-                        }
+                          }
-    keep_only_tags    =[
+    keep_only_tags      = [
-                         dict(name='div', attrs={'class':'articleHead'})
+                               dict(name='div', attrs={'class':['articleHead','leftBox']})
-                         ,dict(name='div', attrs={'id':'channelContent'})
+                              ,dict(name='div', attrs={'id':'channelContent'})
-                       ]
+                              ,dict(name='div', attrs={'id':'rotateBox'})
-    remove_tags = [
+                              ,dict(name='img')
-                      dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']})
+                          ]
-                     ,dict(name=['link','iframe','object'])
+    remove_tags         = [
-                     ,dict(name='a',attrs={'rel':'swap'})
+                              dict(name='div',attrs={'class':['bottomBox clear','bottomBox','breadCrumb','articleControls thin','articleControls thin short','extraVideoList']})
-                     ,dict(name='ul',attrs={'class':'tabs dl contentSwap'})
+                             ,dict(name='h2',attrs={'class':'microhead'})
-                  ]
+                             ,dict(name='div',attrs={'id':'commentsBottom'})
                             ,dict(name=['link','iframe','object'])
                             ,dict(name='a',attrs={'rel':'swap'})
                             ,dict(name='a',attrs={'href':'/news/haiti/'})
                             ,dict(name='ul',attrs={'class':['tabs dl contentSwap','micrositeNav clearIt hList','galleryNav rotateNav']})
                          ]
    remove_tags_after   = [
                            dict(name='div',attrs={'class':'bottomBox clear'})
                           ,dict(name='div',attrs={'class':'rotateBox'})
                           ,dict(name='div',attrs={'id':'contentSwap'})
                          ]
    extra_css = '''
                h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
                h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
                h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
                .bold{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;color:#444444;margin-left: 0px;}
                .subheading{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000; font-weight: bold;}
                .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                .byline span{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small; text-transform: uppercase;}
                .updated{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                .galleryCaption{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
                .galleryUpdated{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
                '''
    remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'})
    feeds = [
              (u'News'       , u'http://www.torontosun.com/news/rss.xml'           )
@ -48,3 +73,19 @@ class TorontoSun(BasicNewsRecipe):
             ,(u'World'      , u'http://www.torontosun.com/news/world/rss.xml'     )
             ,(u'Money'      , u'http://www.torontosun.com/money/rss.xml'          )
            ]
    def preprocess_html(self, soup):
        ##To fetch images from the specified source
        for img in soup.findAll('img', src=True):
            url= img.get('src').split('?')[-1].partition('=')[-1]
            if url:
                img['src'] = url.split('&')[0].partition('=')[0]
                img['width'] = url.split('&')[-1].partition('=')[-1].split('x')[0]
                img['height'] =url.split('&')[-1].partition('=')[-1].split('x')[1]
        return soup
--- a/resources/recipes/tuttosport.recipe
+++ b/resources/recipes/tuttosport.recipe
@ -0,0 +1,66 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __author__      = 'Lorenzo Vigentini'
 __copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
 __version__     = 'v1.01'
 __date__        = '30, January 2010'
 __description__ = 'Sport daily news from Italy'
 '''www.tuttosport.com'''
 from calibre.web.feeds.news import BasicNewsRecipe
 class tuttosport(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini'
    description   = 'Sport daily news from Italy'
    cover_url      = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
    title          = 'Tuttosport'
    publisher      = 'Nuova Editoriale Sportiva S.r.l'
    category       = 'Sport News'
    language       = 'it'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 2
    max_articles_per_feed = 20
    use_embedded_content  = False
    recursion             = 10
    remove_javascript = True
    no_stylesheets = True
    def print_version(self,url):
        segments = url.split('/')
        printURL = '/'.join(segments[0:10]) + '?print'
        return printURL
    keep_only_tags = [
                    dict(name='h2', attrs={'class':'tit_Article'}),
                    dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
                      ]
    feeds       = [
                   (u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
                   (u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
                   (u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
                   (u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
                   (u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
                   (u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
                   (u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
                   (u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
                   (u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
                   (u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
                   (u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
                  ]
    extra_css = '''
                body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
                h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
                h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
                h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
                .txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
                .txt_Article {clear: both; margin: 8px 8px 12px;}
                .txt_Author {float: right;}
                .txt_ArticleAuthor {clear: both; margin: 8px;}
                '''
--- a/resources/recipes/variety.recipe
+++ b/resources/recipes/variety.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.variety.com
 '''
@ -20,8 +18,10 @@ class Variety(BasicNewsRecipe):
    publisher              = 'Red Business Information'
    category               = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
    language               = 'en'
    masthead_url           = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
    extra_css              = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
-    conversion_options = {  
+    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
@ -31,7 +31,7 @@ class Variety(BasicNewsRecipe):
    remove_tags = [dict(name=['object','link','map'])]
    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
-                  
+
    feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
    def print_version(self, url):
@ -41,6 +41,6 @@ class Variety(BasicNewsRecipe):
        catid = catidr.partition('&')[0]
        return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
    def get_article_url(self, article):
        return article.get('feedburner_origlink',  None)
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/winter_olympics.recipe
+++ b/resources/recipes/winter_olympics.recipe
@ -0,0 +1,95 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Starson17'
 '''
 www.nbcolympics.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Olympics_2010(BasicNewsRecipe):
    title          = u'NBC Olympics 2010'
    __author__  = 'Starson17'
    description = 'Olympics 2010'
    cover_url     = 'http://www.digitaljournal.com/img/1/1/2/1/i/4/7/6/o/WinterOlympics2010-logo.jpg'
    publisher      = 'Olympics 2010'
    tags           = 'Olympics news'
    language = 'en'
    use_embedded_content    = False
    no_stylesheets        = True
    remove_javascript = True
    # recursions = 3
    oldest_article        = 7
    max_articles_per_feed = 10
    keep_only_tags = [dict(name='div', attrs={'class':['Article ','ArticleGallery']}),
                      ]
    remove_tags = [dict(name='div', attrs={'id':['RelatedTagsBox','ShareBox']}),
                   dict(name='div', attrs={'class':['DateUtilities','PhotoGallery BoxRight','Frame','ToolBox']}),
                   ]
    # RSS feeds are at: http://www.nbcolympics.com/rss/index.html
    feeds = [
             ('NBCOlympics.com - News', 'http://www.nbcolympics.com/rss/newscenter/mostpopular.xml'),
             ('NBCOlympics.com - News - Top Stories', 'http://www.nbcolympics.com/rss/newscenter/topstories.xml'),
             ('NBCOlympics.com - News - Latest Headlines', 'http://www.nbcolympics.com/rss/newscenter/latestnews.xml'),
             # ('NBCOlympics.com - Photos', 'http://www.nbcolympics.com/rss/photos/mostpopular.xml'),
             # ('NBCOlympics.com - Photos - Editorial Picks', 'http://www.nbcolympics.com/rss/photos/editorialpicks.xml'),
             # ('NBCOlympics.com - Photos - Latest Slideshows', 'http://www.nbcolympics.com/rss/photos/latestslideshows.xml'),
             ('NBCOlympics.com - Team USA - Latest news', 'http://www.nbcolympics.com/rss/countries/team-usa/index.xml'),
             # ('NBCOlympics.com - Team USA - Latest Slideshows', 'http://www.nbcolympics.com/rss/countries/team-usa/photos/index.xml'),
             # ('NBCOlympics.com - Team USA - Video', 'http://www.nbcolympics.com/rss/countries/team-usa/video/index.xml'),
             # ('NBCOlympics.com - Alpine Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
             # ('NBCOlympics.com - Alpine Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
             ('NBCOlympics.com - Alpine Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
             # ('NBCOlympics.com - Biathlon - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BT/mostpopular.xml'),
             # ('NBCOlympics.com - Biathlon - Top News', 'http://www.nbcolympics.com/rss/sport=BT/topnews.xml'),
             ('NBCOlympics.com - Biathlon - Latest News', 'http://www.nbcolympics.com/rss/sport=BT/latestnews.xml'),
             # ('NBCOlympics.com - Bobsled - Most Popular News', 'http://www.nbcolympics.com/rss/sport=BS/mostpopular.xml'),
             # ('NBCOlympics.com - Bobsled - Top News', 'http://www.nbcolympics.com/rss/sport=BS/topnews.xml'),
             ('NBCOlympics.com - Bobsled - Latest News', 'http://www.nbcolympics.com/rss/sport=BS/latestnews.xml'),
             # ('NBCOlympics.com - Cross-Country - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CC/mostpopular.xml'),
             # ('NBCOlympics.com - Cross-Country - Top News', 'http://www.nbcolympics.com/rss/sport=CC/topnews.xml'),
             ('NBCOlympics.com - Cross-Country - Latest News', 'http://www.nbcolympics.com/rss/sport=CC/latestnews.xml'),
             # ('NBCOlympics.com - Curling - Most Popular News', 'http://www.nbcolympics.com/rss/sport=CU/mostpopular.xml'),
             # ('NBCOlympics.com - Curling - Top News', 'http://www.nbcolympics.com/rss/sport=CU/topnews.xml'),
             ('NBCOlympics.com - Curling - Latest News', 'http://www.nbcolympics.com/rss/sport=CU/latestnews.xml'),
             # ('NBCOlympics.com - Figure Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FS/mostpopular.xml'),
             # ('NBCOlympics.com - Figure Skating - Top News', 'http://www.nbcolympics.com/rss/sport=FS/topnews.xml'),
             ('NBCOlympics.com - Figure Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=FS/latestnews.xml'),
             # ('NBCOlympics.com - Freestyle Skiing - Most Popular News', 'http://www.nbcolympics.com/rss/sport=FR/mostpopular.xml'),
             # ('NBCOlympics.com - Freestyle Skiing - Top News', 'http://www.nbcolympics.com/rss/sport=FR/topnews.xml'),
             ('NBCOlympics.com - Freestyle Skiing - Latest News', 'http://www.nbcolympics.com/rss/sport=FR/latestnews.xml'),
             # ('NBCOlympics.com - Hockey - Most Popular News', 'http://www.nbcolympics.com/rss/sport=IH/mostpopular.xml'),
             # ('NBCOlympics.com - Hockey - Top News', 'http://www.nbcolympics.com/rss/sport=IH/topnews.xml'),
             ('NBCOlympics.com - Hockey - Latest News', 'http://www.nbcolympics.com/rss/sport=IH/latestnews.xml'),
             # ('NBCOlympics.com - Luge - Most Popular News', 'http://www.nbcolympics.com/rss/sport=LG/mostpopular.xml'),
             # ('NBCOlympics.com - Luge - Top News', 'http://www.nbcolympics.com/rss/sport=LG/topnews.xml'),
             ('NBCOlympics.com - Luge - Latest News', 'http://www.nbcolympics.com/rss/sport=LG/latestnews.xml'),
             # ('NBCOlympics.com - Nordic Combined - Most Popular News', 'http://www.nbcolympics.com/rss/sport=NC/mostpopular.xml'),
             # ('NBCOlympics.com - Nordic Combined - Top News', 'http://www.nbcolympics.com/rss/sport=NC/topnews.xml'),
             ('NBCOlympics.com - Nordic Combined - Latest News', 'http://www.nbcolympics.com/rss/sport=NC/latestnews.xml'),
             # ('NBCOlympics.com - Short Track - Most Popular News', 'http://www.nbcolympics.com/rss/sport=ST/mostpopular.xml'),
             # ('NBCOlympics.com - Short Track - Top News', 'http://www.nbcolympics.com/rss/sport=ST/topnews.xml'),
             ('NBCOlympics.com - Short Track - Latest News', 'http://www.nbcolympics.com/rss/sport=ST/latestnews.xml'),
             # ('NBCOlympics.com - Skeleton - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SN/mostpopular.xml'),
             # ('NBCOlympics.com - Skeleton - Top News', 'http://www.nbcolympics.com/rss/sport=SN/topnews.xml'),
             ('NBCOlympics.com - Skeleton - Latest News', 'http://www.nbcolympics.com/rss/sport=SN/latestnews.xml'),
             # ('NBCOlympics.com - Ski Jumping - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SJ/mostpopular.xml'),
             # ('NBCOlympics.com - Ski Jumping - Top News', 'http://www.nbcolympics.com/rss/sport=SJ/topnews.xml'),
             ('NBCOlympics.com - Ski Jumping - Latest News', 'http://www.nbcolympics.com/rss/sport=SJ/latestnews.xml'),
             # ('NBCOlympics.com - Snowboarding - Most Popular News', 'http://www.nbcolympics.com/rss/sport=SB/mostpopular.xml'),
             # ('NBCOlympics.com - Snowboarding - Top News', 'http://www.nbcolympics.com/rss/sport=SB/topnews.xml'),
             ('NBCOlympics.com - Snowboarding - Latest News', 'http://www.nbcolympics.com/rss/sport=SB/latestnews.xml'),
             # ('NBCOlympics.com - Speed Skating - Most Popular News', 'http://www.nbcolympics.com/rss/sport=AS/mostpopular.xml'),
             # ('NBCOlympics.com - Speed Skating - Top News', 'http://www.nbcolympics.com/rss/sport=AS/topnews.xml'),
             ('NBCOlympics.com - Speed Skating - Latest News', 'http://www.nbcolympics.com/rss/sport=AS/latestnews.xml'),
             ]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
        '''
--- a/resources/recipes/wired.recipe
+++ b/resources/recipes/wired.recipe
@ -20,6 +20,7 @@ class Wired(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    masthead_url          = 'http://www.wired.com/images/home/wired_logo.gif'
    language              = 'en'
    extra_css             = ' body{font-family: sans-serif} .entryDescription li {display: inline; list-style-type: none} '
    index                 = 'http://www.wired.com/magazine/'
@ -38,14 +39,34 @@ class Wired(BasicNewsRecipe):
                     dict(name=['object','embed','iframe','link'])
                    ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
                  ]
    remove_attributes = ['height','width']              
    #feeds = [(u'Articles' , u'http://www.wired.com/magazine/feed/' )]
    def parse_index(self):
        totalfeeds = []
-        soup = self.index_to_soup(self.index)
+        soup   = self.index_to_soup(self.index)
        majorf = soup.find('div',attrs={'class':'index'})
        if majorf:
           pfarticles = []
           firsta = majorf.find(attrs={'class':'spread-header'})
           if firsta:
              pfarticles.append({
                                  'title'      :self.tag_to_string(firsta.a)
                                 ,'date'       :strftime(self.timefmt)
                                 ,'url'        :'http://www.wired.com' + firsta.a['href']
                                 ,'description':''
                                })
           for itt in majorf.findAll('li'):
               itema = itt.find('a',href=True)
               if itema:
                  pfarticles.append({
                                      'title'      :self.tag_to_string(itema)
                                     ,'date'       :strftime(self.timefmt)
                                     ,'url'        :'http://www.wired.com' + itema['href']
                                     ,'description':''
                                    })
           totalfeeds.append(('Cover', pfarticles))
        features = soup.find('div',attrs={'id':'my-glider'})
        if features:
           farticles = []
--- a/resources/recipes/wired_daily.recipe
+++ b/resources/recipes/wired_daily.recipe
@ -0,0 +1,44 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'
 from calibre.web.feeds.news import BasicNewsRecipe
 class Wired_Daily(BasicNewsRecipe):
    title = 'Wired Daily Edition'
    __author__ = 'Kovid Goyal'
    description = 'Technology news'
    timefmt  = ' [%Y%b%d  %H%M]'
    language = 'en'
    no_stylesheets = True
    remove_tags_before = dict(name='div', id='content')
    remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar',
        'footer', 'advertisement', 'blog_subscription_unit',
        'brightcove_component']),
        {'class':'entryActions'},
        dict(name=['noscript', 'script'])]
    feeds = [
        ('Top News', 'http://feeds.wired.com/wired/index'),
        ('Culture', 'http://feeds.wired.com/wired/culture'),
        ('Software', 'http://feeds.wired.com/wired/software'),
        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
        ('Cars', 'http://feeds.wired.com/wired/cars'),
        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
        ('Science', 'http://feeds.wired.com/wired/science'),
        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
        ('Politics', 'http://feeds.wired.com/wired/politics'),
        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
        ]
    def print_version(self, url):
        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
                # first, check if there is an h3 tag which provides a section name
                stag = divtag.find('h3')
                if stag:
-                    if stag.parent['class'] == 'dynamic':
+                    if stag.parent.get('class', '') == 'dynamic':
                        # a carousel of articles is too complex to extract a section name
                        # for each article, so we'll just call the section "Carousel"
                        section_name = 'Carousel'
--- a/resources/tanea.recipe
+++ b/resources/tanea.recipe
@ -0,0 +1,30 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class TaNea(BasicNewsRecipe):
    title          = u'Ta Nea'
    __author__             = 'Pan'
    oldest_article = 1
    max_articles_per_feed = 100
    no_stylesheets         = True
    remove_tags_before = dict(name='div',attrs={'id':'print-body'})
    remove_tags_after = dict(name='div',attrs={'id':'text'})
    feeds = [
        (u'\xce\x95\xce\xbb\xce\xbb\xce\xac\xce\xb4\xce\xb1',
        u'http://www.tanea.gr/default.asp?pid=66&la=1'),
        (u'\xce\x9a\xcf\x8c\xcf\x83\xce\xbc\xce\xbf\xcf\x82',
        u'http://www.tanea.gr/default.asp?pid=67&la=1'),
        (u'\xce\x9f\xce\xb9\xce\xba\xce\xbf\xce\xbd\xce\xbf\xce\xbc\xce\xaf\xce\xb1',
        u'http://www.tanea.gr/default.asp?pid=68&la=1'),
        (u'\xce\xa0\xce\xbf\xce\xbb\xce\xb9\xcf\x84\xce\xb9\xcf\x83\xce\xbc\xcf\x8c\xcf\x82',
        u'http://www.tanea.gr/default.asp?pid=69&la=1'),
        (u'\xce\x93\xce\xbd\xcf\x8e\xce\xbc\xce\xb5\xcf\x82',
        u'http://www.tanea.gr/default.asp?pid=79&la=1'),
        (u'\xce\xa1\xce\xb9\xcf\x80\xce\xad\xcf\x82',
        u'http://www.tanea.gr/default.asp?pid=80&la=1'),
        (u'\xce\x91\xce\xb9\xcf\x87\xce\xbc\xce\xad\xcf\x82',
        u'http://www.tanea.gr/default.asp?pid=81&la=1')]
    def print_version(self, url):
        return url.replace('http://www.tanea.gr/default.asp?pid=2', 'http://www.tanea.gr/default.asp?pid=96')
--- a/resources/viewer/bookmarks.js
+++ b/resources/viewer/bookmarks.js
@ -20,37 +20,8 @@ function selector(elem) {
    return sel;
 }
-function find_closest_enclosing_block(top) {
+function calculate_bookmark(y, node) {
-    var START = top-1000;
+    var elem = $(node);
    var STOP = top;
    var matches = [];
    var elem, temp;
    var width = 1000;
    for (y = START; y < STOP; y += 20) {
        for ( x = 0; x < width; x += 20) {
            elem = document.elementFromPoint(x, y);
            try {
                elem = $(elem);
                temp = elem.offset().top
                matches.push(elem);
                if (Math.abs(temp - START) < 25) { y = STOP; break}
            } catch(error) {}
        }
    }
    var miny = Math.abs(matches[0].offset().top - START), min_elem = matches[0];
    for (i = 1; i < matches.length; i++) {
        elem = matches[i];
        temp = Math.abs(elem.offset().top - START);
        if ( temp < miny ) { miny = temp; min_elem = elem; }
    }
    return min_elem;
 }
 function calculate_bookmark(y) {
    var elem = find_closest_enclosing_block(y);
    var sel = selector(elem);
    var ratio = (y - elem.offset().top)/elem.height();
    if (ratio > 1) { ratio = 1; }
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -399,7 +399,7 @@ class BuildPDF2XML(Command):
            objects.append(obj)
        if self.newer(dest, objects):
-            cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
+            cmd = ['g++', '-ggdb', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
            '-lpng', '-lpthread']
            if iswindows:
                cmd = [msvc.linker] + '/INCREMENTAL:NO /DEBUG /NODEFAULTLIB:libcmt.lib'.split()
--- a/setup/install.py
+++ b/setup/install.py
@ -137,8 +137,20 @@ class Develop(Command):
        self.setup_mount_helper()
        self.install_files()
        self.run_postinstall()
        self.install_env_module()
        self.success()
    def install_env_module(self):
        import distutils.sysconfig as s
        libdir = s.get_python_lib(prefix=self.opts.staging_root)
        if os.path.exists(libdir):
            path = os.path.join(libdir, 'init_calibre.py')
            self.info('Installing calibre environment module: '+path)
            with open(path, 'wb') as f:
                f.write(HEADER.format(**self.template_args()))
        else:
            self.warn('Cannot install calibre environment module to: '+libdir)
    def setup_mount_helper(self):
        def warn():
            self.warn('Failed to compile mount helper. Auto mounting of',
@ -180,13 +192,20 @@ class Develop(Command):
                    functions[typ]):
                self.write_template(name, mod, func)
    def template_args(self):
        return {
            'path':self.libdir,
            'resources':self.sharedir,
            'executables':self.bindir,
            'extensions':self.j(self.libdir, 'calibre', 'plugins')
            }
    def write_template(self, name, mod, func):
        template = COMPLETE_TEMPLATE if name == 'calibre-complete' else TEMPLATE
-        script = template.format(
+        args = self.template_args()
-                module=mod, func=func,
+        args['module'] = mod
-                path=self.libdir, resources=self.sharedir,
+        args['func'] = func
-                executables=self.bindir,
+        script = template.format(**args)
                extensions=self.j(self.libdir, 'calibre', 'plugins'))
        path = self.j(self.staging_bindir, name)
        if not os.path.exists(self.staging_bindir):
            os.makedirs(self.staging_bindir)
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -15,7 +15,7 @@ class Rsync(Command):
    description = 'Sync source tree from development machine'
-    SYNC_CMD = ('rsync -avz --exclude src/calibre/plugins '
+    SYNC_CMD = ('rsync -avz --delete --exclude src/calibre/plugins '
               '--exclude src/calibre/manual --exclude src/calibre/trac '
               '--exclude .bzr --exclude .build --exclude .svn --exclude build --exclude dist '
               '--exclude "*.pyc" --exclude "*.pyo" --exclude "*.swp" --exclude "*.swo" '
--- a/setup/resources.py
+++ b/setup/resources.py
@ -48,7 +48,9 @@ class Resources(Command):
        dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
        if self.newer(dest, files):
            self.info('\tCreating builtin_recipes.xml')
-            open(dest, 'wb').write(serialize_builtin_recipes())
+            xml = serialize_builtin_recipes()
            with open(dest, 'wb') as f:
                f.write(xml)
        dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
        files = []
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -378,10 +378,11 @@ def strftime(fmt, t=None):
        t = time.localtime()
    early_year = t[0] < 1900
    if early_year:
        replacement = 1900 if t[0]%4 == 0 else 1901
        fmt = fmt.replace('%Y', '_early year hack##')
        t = list(t)
        orig_year = t[0]
-        t[0] = 1900
+        t[0] = replacement
    ans = None
    if iswindows:
        if isinstance(fmt, unicode):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.36'
+__version__   = '0.6.40'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -7,6 +7,7 @@ import os
 import glob
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract
 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
@ -416,9 +417,10 @@ from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
-                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK
+                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
                BOOQ
 from calibre.devices.iliad.driver import ILIAD
-from calibre.devices.irexdr.driver import IREXDR1000
+from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
@ -430,11 +432,11 @@ from calibre.devices.eslick.driver import ESLICK
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
-from calibre.devices.hanvon.driver import N516
+from calibre.devices.hanvon.driver import N516, EB511
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
-plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
+plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, CSV_XML, EPUB_MOBI]
 plugins += [
    ComicInput,
    EPUBInput,
@ -477,6 +479,7 @@ plugins += [
    CYBOOK,
    ILIAD,
    IREXDR1000,
    IREXDR800,
    JETBOOK,
    SHINEBOOK,
    POCKETBOOK360,
@ -500,9 +503,11 @@ plugins += [
    DBOOK,
    INVESBOOK,
    BOOX,
    BOOQ,
    EB600,
    README,
    N516,
    EB511,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -20,7 +20,7 @@ class ANDROID(USBMS):
    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
            0x22b8 : { 0x41d9 : [0x216]},
-            0x18d1 : { 0x4e11 : [0x0100]},
+            0x18d1 : { 0x4e11 : [0x0100], 0x4e12: [0x0100]},
            }
    EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -184,3 +184,14 @@ class INVESBOOK(EB600):
    VENDOR_NAME = 'INVES_E6'
    WINDOWS_MAIN_MEM = '00INVES_E600'
    WINDOWS_CARD_A_MEM = '00INVES_E600'
 class BOOQ(EB600):
    name = 'Booq Device Interface'
    gui_name = 'Booq'
    FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'pdf', 'doc', 'rtf', 'txt', 'html']
    VENDOR_NAME = 'NETRONIX'
    WINDOWS_MAIN_MEM = 'EB600'
    WINDOWS_CARD_A_MEM = 'EB600'
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -126,3 +126,15 @@ class BOOX(HANLINV3):
    EBOOK_DIR_MAIN = 'MyBooks'
    EBOOK_DIR_CARD_A = 'MyBooks'
    def windows_sort_drives(self, drives):
        main = drives.get('main', None)
        card = drives.get('carda', None)
        if card and main and card < main:
            drives['main'] = card
            drives['carda'] = main
        return drives
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Hanvon devices
 '''
 import re
 from calibre.devices.usbms.driver import USBMS
@ -32,3 +33,25 @@ class N516(USBMS):
    EBOOK_DIR_MAIN = 'e_book'
    SUPPORTS_SUB_DIRS = True
 class EB511(USBMS):
    name           = 'Elonex EB 511 driver'
    gui_name       = 'EB 511'
    description    = _('Communicate with the Elonex EB 511 eBook reader.')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    FORMATS     = ['epub', 'html', 'pdf', 'txt']
    VENDOR_ID   = [0x45e]
    PRODUCT_ID  = [0xffff]
    BCD         = [0x0]
    MAIN_MEMORY_VOLUME_LABEL  = 'EB 511 Internal Memory'
    EBOOK_DIR_MAIN = 'e_book'
    SUPPORTS_SUB_DIRS = True
    OSX_MAIN_MEM_VOL_PAT = re.compile(r'/eReader')
--- a/src/calibre/devices/irexdr/driver.py
+++ b/src/calibre/devices/irexdr/driver.py
@ -36,3 +36,14 @@ class IREXDR1000(USBMS):
    EBOOK_DIR_MAIN = 'ebooks'
    DELETE_EXTS = ['.mbp']
    SUPPORTS_SUB_DIRS = True
 class IREXDR800(IREXDR1000):
    name           = 'IRex Digital Reader 800 Device Interface'
    description    = _('Communicate with the IRex Digital Reader 800')
    PRODUCT_ID = [0x002]
    WINDOWS_MAIN_MEM = 'DR800'
    FORMATS     = ['epub', 'html', 'pdf', 'txt']
    EBOOK_DIR_MAIN = 'Books'
    DELETE_EXTS = []
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -192,17 +192,15 @@ class PRS505(CLI, Device):
        fix_ids(*booklists)
        if not os.path.exists(self._main_prefix):
            os.makedirs(self._main_prefix)
-        f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
+        with open(self._main_prefix + self.__class__.MEDIA_XML, 'wb') as f:
-        booklists[0].write(f)
+            booklists[0].write(f)
        f.close()
        def write_card_prefix(prefix, listid):
            if prefix is not None and hasattr(booklists[listid], 'write'):
                if not os.path.exists(prefix):
                    os.makedirs(prefix)
-                f = open(prefix + self.__class__.CACHE_XML, 'wb')
+                with open(prefix + self.__class__.CACHE_XML, 'wb') as f:
-                booklists[listid].write(f)
+                    booklists[listid].write(f)
                f.close()
        write_card_prefix(self._card_a_prefix, 1)
        write_card_prefix(self._card_b_prefix, 2)
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -70,6 +70,19 @@ def extract_cover_from_embedded_svg(html, base, log):
        if href and os.access(path, os.R_OK):
            return open(path, 'rb').read()
 def extract_calibre_cover(raw, base, log):
    from calibre.ebooks.BeautifulSoup import BeautifulSoup
    soup = BeautifulSoup(raw)
    matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
        'font', 'br'])
    images = soup.findAll('img')
    if matches is None and len(images) == 1 and \
            images[0].get('alt', '')=='cover':
        img = images[0]
        img = os.path.join(base, *img['src'].split('/'))
        if os.path.exists(img):
            return open(img, 'rb').read()
 def render_html_svg_workaround(path_to_html, log, width=590, height=750):
    from calibre.ebooks.oeb.base import SVG_NS
    raw = open(path_to_html, 'rb').read()
@ -80,6 +93,11 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
                   os.path.dirname(path_to_html), log)
        except:
            pass
    if data is None:
        try:
            data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
        except:
            pass
    if data is None:
        renderer = render_html(path_to_html, width, height)
        data = getattr(renderer, 'data', None)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -12,6 +12,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
        run_plugins_on_preprocess, run_plugins_on_postprocess
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.date import parse_date
 from calibre import extract, walk
 DEBUG_README=u'''
@ -65,7 +66,7 @@ class Plumber(object):
    metadata_option_names = [
        'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
        'publisher', 'series', 'series_index', 'rating', 'isbn',
-        'tags', 'book_producer', 'language'
+        'tags', 'book_producer', 'language', 'pubdate', 'timestamp'
        ]
    def __init__(self, input, output, log, report_progress=DummyReporter(),
@ -461,6 +462,14 @@ OptionRecommendation(name='language',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the language.')),
 OptionRecommendation(name='pubdate',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the publication date.')),
 OptionRecommendation(name='timestamp',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the book timestamp (used by the date column in calibre).')),
 ]
        input_fmt = os.path.splitext(self.input)[1]
@ -619,6 +628,14 @@ OptionRecommendation(name='language',
                    except ValueError:
                        self.log.warn(_('Values of series index and rating must'
                        ' be numbers. Ignoring'), val)
                        continue
                elif x in ('timestamp', 'pubdate'):
                    try:
                        val = parse_date(val, assume_utc=x=='pubdate')
                    except:
                        self.log.exception(_('Failed to parse date/time') + ' ' +
                                unicode(val))
                        continue
                setattr(mi, x, val)
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -132,6 +132,8 @@ class EPUBInput(InputFormatPlugin):
        self.rationalize_cover(opf, log)
        self.optimize_opf_parsing = opf
        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -256,7 +256,20 @@ class EPUBOutput(OutputFormatPlugin):
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
-        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename
+        from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
        # ADE cries big wet tears when it encounters an invalid fragment
        # identifier in the NCX toc.
        frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$')
        for node in self.oeb.toc.iter():
            href = getattr(node, 'href', None)
            if hasattr(href, 'partition'):
                base, _, frag = href.partition('#')
                frag = urlunquote(frag)
                if frag and frag_pat.match(frag) is None:
                    self.log.warn(
                            'Removing invalid fragment identifier %r from TOC'%frag)
                    node.href = base
        for x in self.oeb.spine:
            root = x.data
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -111,7 +111,7 @@ class HTMLFile(object):
                raise IOError(msg)
            raise IgnoreFile(msg, err.errno)
-        self.is_binary = not bool(self.HTML_PAT.search(src[:4096]))
+        self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
        if not self.is_binary:
            if encoding is None:
                encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
@ -408,7 +408,10 @@ class HTMLInput(InputFormatPlugin):
            return link_
        if base and not os.path.isabs(link):
            link = os.path.join(base, link)
-        link = os.path.abspath(link)
+        try:
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
--- a/src/calibre/ebooks/lrf/pylrs/pylrs.py
+++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py
@ -50,6 +50,7 @@ from pylrf import (LrfWriter, LrfObject, LrfTag, LrfToc,
        STREAM_COMPRESSED, LrfTagStream, LrfStreamBase, IMAGE_TYPE_ENCODING,
        BINDING_DIRECTION_ENCODING, LINE_TYPE_ENCODING, LrfFileStream,
        STREAM_FORCE_COMPRESSED)
 from calibre.utils.date import isoformat
 DEFAULT_SOURCE_ENCODING = "cp1252"      # defualt is us-windows character set
 DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs
@ -852,7 +853,7 @@ class DocInfo(object):
        self.thumbnail = None
        self.language = "en"
        self.creator  = None
-        self.creationdate = date.today().isoformat()
+        self.creationdate = str(isoformat(date.today()))
        self.producer = "%s v%s"%(__appname__, __version__)
        self.numberofpages = "0"
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -10,9 +10,11 @@ import os, mimetypes, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse
 from calibre import relpath
 from calibre.utils.config import tweaks
 from calibre.utils.date import isoformat
 _author_pat = re.compile(',?\s+(and|with)\s+', re.IGNORECASE)
 def string_to_authors(raw):
    raw = raw.replace('&&', u'\uffff')
@ -27,6 +29,9 @@ def authors_to_string(authors):
        return ''
 def author_to_author_sort(author):
    method = tweaks['author_sort_copy_method']
    if method == 'copy' or (method == 'comma' and author.count(',') > 0):
        return author
    tokens = author.split()
    tokens = tokens[-1:] + tokens[:-1]
    if len(tokens) > 1:
@ -340,9 +345,9 @@ class MetaInformation(object):
        if self.rating is not None:
            fmt('Rating', self.rating)
        if self.timestamp is not None:
-            fmt('Timestamp', self.timestamp.isoformat(' '))
+            fmt('Timestamp', isoformat(self.timestamp))
        if self.pubdate is not None:
-            fmt('Published', self.pubdate.isoformat(' '))
+            fmt('Published', isoformat(self.pubdate))
        if self.rights is not None:
            fmt('Rights', unicode(self.rights))
        if self.lccn:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -7,12 +7,11 @@ __docformat__ = 'restructuredtext en'
 Fetch metadata using Amazon AWS
 '''
 import sys, re
 from datetime import datetime
 from lxml import etree
 from dateutil import parser
 from calibre import browser
 from calibre.utils.date import parse_date, utcnow
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
@ -44,9 +43,8 @@ def get_social_metadata(title, authors, publisher, isbn):
        try:
            d = root.findtext('.//'+AWS('PublicationDate'))
            if d:
-                default = datetime.utcnow()
+                default = utcnow().replace(day=15)
-                default = datetime(default.year, default.month, 15)
+                d = parse_date(d[0].text, assume_utc=True, default=default)
                d = parser.parse(d[0].text, default=default)
                mi.pubdate = d
        except:
            pass
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -0,0 +1,65 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 from contextlib import closing
 from calibre.customize import FileTypePlugin
 def is_comic(list_of_names):
    extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names])
    return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png')
 class ArchiveExtract(FileTypePlugin):
    name = 'Archive Extract'
    author = 'Kovid Goyal'
    description = _('Extract common e-book formats from archives '
        '(zip/rar) files. Also try to autodetect if they are actually '
        'cbz/cbr files.')
    file_types = set(['zip', 'rar'])
    supported_platforms = ['windows', 'osx', 'linux']
    on_import = True
    def run(self, archive):
        is_rar = archive.lower().endswith('.rar')
        if is_rar:
            from calibre.libunrar import extract_member, names
        else:
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(archive, 'r')
        if is_rar:
            fnames = names(archive)
        else:
            fnames = zf.namelist()
        fnames = [x for x in fnames if '.' in x]
        if is_comic(fnames):
            ext = '.cbr' if is_rar else '.cbz'
            of = self.temporary_file('_archive_extract'+ext)
            with open(archive, 'rb') as f:
                of.write(f.read())
            of.close()
            return of.name
        if len(fnames) > 1 or not fnames:
            return archive
        fname = fnames[0]
        ext = os.path.splitext(fname)[1][1:]
        if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
                'mp3', 'pdb', 'azw', 'azw1'):
            return archive
        of = self.temporary_file('_archive_extract.'+ext)
        with closing(of):
            if is_rar:
                data = extract_member(archive, match=None, name=fname)[1]
                of.write(data)
            else:
                of.write(zf.read(fname))
        return of.name
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -15,6 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
                    title_sort, MetaInformation
 from calibre.ebooks.lrf.meta import LRFMetaFile
 from calibre import prints
 from calibre.utils.date import parse_date
 USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
 _('''
@ -69,6 +70,8 @@ def config():
              help=_('Set the book producer.'))
    c.add_opt('language', ['-l', '--language'],
              help=_('Set the language.'))
    c.add_opt('pubdate', ['-d', '--date'],
              help=_('Set the published date.'))
    c.add_opt('get_cover', ['--get-cover'],
              help=_('Get the cover from the ebook and save it at as the '
@ -132,6 +135,8 @@ def do_set_metadata(opts, mi, stream, stream_type):
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'pubdate', None) is not None:
        mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False)
    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from epub files'''
-import os
+import os, re
 from cStringIO import StringIO
 from contextlib import closing
@ -29,15 +29,15 @@ class Container(dict):
    def __init__(self, stream=None):
        if not stream: return
        soup = BeautifulStoneSoup(stream.read())
-        container = soup.find('container')
+        container = soup.find(name=re.compile(r'container$', re.I))
        if not container:
-            raise OCFException("<container/> element missing")
+            raise OCFException("<container> element missing")
        if container.get('version', None) != '1.0':
            raise EPubException("unsupported version of OCF")
-        rootfiles = container.find('rootfiles')
+        rootfiles = container.find(re.compile(r'rootfiles$', re.I))
        if not rootfiles:
            raise EPubException("<rootfiles/> element missing")
-        for rootfile in rootfiles.findAll('rootfile'):
+        for rootfile in rootfiles.findAll(re.compile(r'rootfile$', re.I)):
            try:
                self[rootfile['media-type']] = rootfile['full-path']
            except KeyError:
@ -69,7 +69,7 @@ class OCFReader(OCF):
        self.opf_path = self.container[OPF.MIMETYPE]
        try:
            with closing(self.open(self.opf_path)) as f:
-                self.opf = OPF(f, self.root)
+                self.opf = OPF(f, self.root, populate_spine=False)
        except KeyError:
            raise EPubException("missing OPF package file")
@ -101,10 +101,9 @@ class OCFDirReader(OCFReader):
 def get_cover(opf, opf_path, stream):
    from calibre.ebooks import render_html_svg_workaround
    from calibre.utils.logging import default_log
-    spine = list(opf.spine_items())
+    cpage = opf.first_spine_item()
-    if not spine:
+    if not cpage:
        return
    cpage = spine[0]
    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            stream.seek(0)
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -6,14 +6,13 @@ __docformat__ = 'restructuredtext en'
 import sys, textwrap
 from urllib import urlencode
 from functools import partial
 from datetime import datetime
 from lxml import etree
 from dateutil import parser
 from calibre import browser, preferred_encoding
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import OptionParser
 from calibre.utils.date import parse_date, utcnow
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
@ -156,9 +155,8 @@ class ResultList(list):
        try:
            d = date(entry)
            if d:
-                default = datetime.utcnow()
+                default = utcnow().replace(day=15)
-                default = datetime(default.year, default.month, 15)
+                d = parse_date(d[0].text, assume_utc=True, default=default)
                d = parser.parse(d[0].text, default=default)
            else:
                d = None
        except:
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -65,6 +65,10 @@ def _metadata_from_formats(formats):
    return mi
 def is_recipe(filename):
    return filename.startswith('calibre') and \
        filename.rpartition('.')[0].endswith('_recipe_out')
 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
    if stream_type: stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
@ -84,11 +88,10 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
        return opf
    mi = MetaInformation(None, None)
    if prefs['read_file_metadata']:
        mi = get_file_type_metadata(stream, stream_type)
    name = os.path.basename(getattr(stream, 'name', ''))
    base = metadata_from_filename(name)
    if is_recipe(name) or prefs['read_file_metadata']:
        mi = get_file_type_metadata(stream, stream_type)
    if base.title == os.path.splitext(name)[0] and base.authors is None:
        # Assume that there was no metadata in the file and the user set pattern
        # to match meta info from the file name did not match.
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -11,13 +11,11 @@ __docformat__ = 'restructuredtext en'
 from struct import pack, unpack
 from cStringIO import StringIO
 from datetime import datetime
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
 from calibre.ebooks.mobi.langcodes import iana2mobi
-
+from calibre.utils.date import now as nowf
 import struct
 class StreamSlicer(object):
@ -105,11 +103,12 @@ class MetadataUpdater(object):
        have_exth = self.have_exth = (flags & 0x40) != 0
        self.cover_record = self.thumbnail_record = None
        self.timestamp = None
        self.pdbrecords = self.get_pdbrecords()
        self.original_exth_records = {}
        if not have_exth:
            self.create_exth()
-
+            self.have_exth = True
        # Fetch timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()
@ -131,14 +130,18 @@ class MetadataUpdater(object):
            content = exth[pos + 8: pos + size]
            pos += size
            self.original_exth_records[id] = content
            if id == 106:
                self.timestamp = content
            elif id == 201:
-                rindex, = self.cover_rindex, = unpack('>I', content)
+                rindex, = self.cover_rindex, = unpack('>i', content)
-                self.cover_record = self.record(rindex + image_base)
+                if rindex > 0 :
                    self.cover_record = self.record(rindex + image_base)
            elif id == 202:
-                rindex, = self.thumbnail_rindex, = unpack('>I', content)
+                rindex, = self.thumbnail_rindex, = unpack('>i', content)
-                self.thumbnail_record = self.record(rindex + image_base)
+                if rindex > 0 :
                    self.thumbnail_record = self.record(rindex + image_base)
    def patch(self, off, new_record0):
        # Save the current size of each record
@ -181,14 +184,14 @@ class MetadataUpdater(object):
        off = self.pdbrecords[section][0]
        self.patch(off, new)
-    def create_exth(self, exth=None):
+    def create_exth(self, new_title=None, exth=None):
        # Add an EXTH block to record 0, rewrite the stream
        # self.hexdump(self.record0)
-        # Fetch the title
+        # Fetch the existing title
-        title_offset, = struct.unpack('>L', self.record0[0x54:0x58])
+        title_offset, = unpack('>L', self.record0[0x54:0x58])
-        title_length, = struct.unpack('>L', self.record0[0x58:0x5c])
+        title_length, = unpack('>L', self.record0[0x58:0x5c])
-        title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
+        title_in_file, = unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length])
        # Adjust length to accommodate PrimaryINDX if necessary
        mobi_header_length, = unpack('>L', self.record0[0x14:0x18])
@ -207,14 +210,21 @@ class MetadataUpdater(object):
            exth = ['EXTH', pack('>II', 12, 0), pad]
            exth = ''.join(exth)
-        # Update title_offset
+        # Update title_offset, title_len if new_title
        self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth))
        if new_title:
            self.record0[0x58:0x5c] = pack('>L', len(new_title))
        # Create an updated Record0
        new_record0 = StringIO()
        new_record0.write(self.record0[:0x10 + mobi_header_length])
        new_record0.write(exth)
-        new_record0.write(title_in_file)
+        if new_title:
            #new_record0.write(new_title.encode(self.codec, 'replace'))
            new_title = (new_title or _('Unknown')).encode(self.codec, 'replace')
            new_record0.write(new_title)
        else:
            new_record0.write(title_in_file)
        # Pad to a 4-byte boundary
        trail = len(new_record0.getvalue()) % 4
@ -244,7 +254,7 @@ class MetadataUpdater(object):
    def get_pdbrecords(self):
        pdbrecords = []
        for i in xrange(self.nrecs):
-            offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
+            offset, a1,a2,a3,a4 = unpack('>LBBBB', self.data[78+i*8:78+i*8+8])
            flags, val = a1, a2<<16|a3<<8|a4
            pdbrecords.append( [offset, flags, val] )
        return pdbrecords
@ -275,6 +285,10 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)
    def update(self, mi):
        def pop_exth_record(exth_id):
            if exth_id in self.original_exth_records:
                self.original_exth_records.pop(exth_id)
        if self.type != "BOOKMOBI":
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
                                "\tThis is a '%s' file of type '%s'" % (self.type[0:4], self.type[4:8]))
@ -289,35 +303,53 @@ class MetadataUpdater(object):
        if mi.author_sort and pas:
            authors = mi.author_sort
            recs.append((100, authors.encode(self.codec, 'replace')))
            pop_exth_record(100)
        elif mi.authors:
            authors = '; '.join(mi.authors)
            recs.append((100, authors.encode(self.codec, 'replace')))
            pop_exth_record(100)
        if mi.publisher:
            recs.append((101, mi.publisher.encode(self.codec, 'replace')))
            pop_exth_record(101)
        if mi.comments:
            recs.append((103, mi.comments.encode(self.codec, 'replace')))
            pop_exth_record(103)
        if mi.isbn:
            recs.append((104, mi.isbn.encode(self.codec, 'replace')))
            pop_exth_record(104)
        if mi.tags:
            subjects = '; '.join(mi.tags)
            recs.append((105, subjects.encode(self.codec, 'replace')))
            pop_exth_record(105)
        if mi.pubdate:
            recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
            pop_exth_record(106)
        elif mi.timestamp:
            recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
            pop_exth_record(106)
        elif self.timestamp:
            recs.append((106, self.timestamp))
            pop_exth_record(106)
        else:
-            recs.append((106, str(datetime.now()).encode(self.codec, 'replace')))
+            recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
            pop_exth_record(106)
        if self.cover_record is not None:
            recs.append((201, pack('>I', self.cover_rindex)))
            recs.append((203, pack('>I', 0)))
            pop_exth_record(201)
            pop_exth_record(203)
        if self.thumbnail_record is not None:
            recs.append((202, pack('>I', self.thumbnail_rindex)))
            pop_exth_record(202)
        if getattr(self, 'encryption_type', -1) != 0:
            raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
        # Restore any original EXTH fields that weren't modified/updated
        for id in sorted(self.original_exth_records):
            recs.append((id, self.original_exth_records[id]))
        recs = sorted(recs, key=lambda x:(x[0],x[0]))
        exth = StringIO()
        for code, data in recs:
            exth.write(pack('>II', code, len(data) + 8))
@ -332,7 +364,7 @@ class MetadataUpdater(object):
            raise MobiError('No existing EXTH record. Cannot update metadata.')
        self.record0[92:96] = iana2mobi(mi.language)
-        self.create_exth(exth)
+        self.create_exth(exth=exth, new_title=mi.title)
        # Fetch updated timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -12,12 +12,12 @@ from urllib import unquote
 from urlparse import urlparse
 from lxml import etree
 from dateutil import parser
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.constants import __appname__, __version__, filesystem_encoding
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 from calibre.utils.date import parse_date, isoformat
 class Resource(object):
@ -272,6 +272,10 @@ class Spine(ResourceCollection):
            self.id = idfunc(self.path)
            self.idref = None
        def __repr__(self):
            return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
                    (self.path, self.id, self.is_linear)
    @staticmethod
    def from_opf_spine_element(itemrefs, manifest):
        s = Spine(manifest)
@ -280,7 +284,7 @@ class Spine(ResourceCollection):
            if idref is not None:
                path = s.manifest.path_for_id(idref)
                if path:
-                    r = Spine.Item(s.manifest.id_for_path, path, is_path=True)
+                    r = Spine.Item(lambda x:idref, path, is_path=True)
                    r.is_linear = itemref.get('linear', 'yes') == 'yes'
                    r.idref = idref
                    s.append(r)
@ -441,6 +445,8 @@ class OPF(object):
    guide_path      = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
    title           = MetadataField('title', formatter=lambda x: re.sub(r'\s+', ' ', x))
    title_sort      = MetadataField('title_sort', formatter=lambda x:
                        re.sub(r'\s+', ' ', x), is_dc=False)
    publisher       = MetadataField('publisher')
    language        = MetadataField('language')
    comments        = MetadataField('description')
@ -449,12 +455,14 @@ class OPF(object):
    series          = MetadataField('series', is_dc=False)
    series_index    = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
    rating          = MetadataField('rating', is_dc=False, formatter=int)
-    pubdate         = MetadataField('date', formatter=parser.parse)
+    pubdate         = MetadataField('date', formatter=parse_date)
    publication_type = MetadataField('publication_type', is_dc=False)
-    timestamp       = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
+    timestamp       = MetadataField('timestamp', is_dc=False,
                                    formatter=parse_date)
-    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True):
+    def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
            populate_spine=True):
        if not hasattr(stream, 'read'):
            stream = open(stream, 'rb')
        raw = stream.read()
@ -477,7 +485,7 @@ class OPF(object):
            self.manifest = Manifest.from_opf_manifest_element(m, basedir)
        self.spine = None
        s = self.spine_path(self.root)
-        if s:
+        if populate_spine and s:
            self.spine = Spine.from_opf_spine_element(s, self.manifest)
        self.guide = None
        guide = self.guide_path(self.root)
@ -584,6 +592,15 @@ class OPF(object):
                if x.get('id', None) == idref:
                    yield x.get('href', '')
    def first_spine_item(self):
        items = self.iterspine()
        if not items:
            return None
        idref = items[0].get('idref', '')
        for x in self.itermanifest():
            if x.get('id', None) == idref:
                return x.get('href', None)
    def create_spine_item(self, idref):
        ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
        ans.tail = '\n\t\t'
@ -675,29 +692,6 @@ class OPF(object):
        return property(fget=fget, fset=fset)
    @dynamic_property
    def title_sort(self):
        def fget(self):
            matches = self.title_path(self.metadata)
            if matches:
                for match in matches:
                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
                    if not ans:
                        ans = match.get('file-as', None)
                    if ans:
                        return ans
        def fset(self, val):
            matches = self.title_path(self.metadata)
            if matches:
                for key in matches[0].attrib:
                    if key.endswith('file-as'):
                        matches[0].attrib.pop(key)
                matches[0].set('file-as', unicode(val))
        return property(fget=fget, fset=fset)
    @dynamic_property
    def tags(self):
@ -869,7 +863,8 @@ class OPF(object):
    def smart_update(self, mi):
        for attr in ('title', 'authors', 'author_sort', 'title_sort',
                     'publisher', 'series', 'series_index', 'rating',
-                     'isbn', 'language', 'tags', 'category', 'comments'):
+                     'isbn', 'language', 'tags', 'category', 'comments',
                     'pubdate'):
            val = getattr(mi, attr, None)
            if val is not None and val != [] and val != (None, None):
                setattr(self, attr, val)
@ -1041,12 +1036,12 @@ def metadata_to_opf(mi, as_string=True):
            elem.text = text.strip()
        metadata.append(elem)
-    factory(DC('title'), mi.title, mi.title_sort)
+    factory(DC('title'), mi.title)
    for au in mi.authors:
        factory(DC('creator'), au, mi.author_sort, 'aut')
    factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
    if hasattr(mi.pubdate, 'isoformat'):
-        factory(DC('date'), mi.pubdate.isoformat())
+        factory(DC('date'), isoformat(mi.pubdate))
    factory(DC('language'), mi.language)
    if mi.category:
        factory(DC('type'), mi.category)
@ -1069,9 +1064,11 @@ def metadata_to_opf(mi, as_string=True):
    if mi.rating is not None:
        meta('rating', str(mi.rating))
    if hasattr(mi.timestamp, 'isoformat'):
-        meta('timestamp', mi.timestamp.isoformat())
+        meta('timestamp', isoformat(mi.timestamp))
    if mi.publication_type:
        meta('publication_type', mi.publication_type)
    if mi.title_sort:
        meta('title_sort', mi.title_sort)
    metadata[-1].tail = '\n' +(' '*4)
@ -1088,12 +1085,12 @@ def metadata_to_opf(mi, as_string=True):
 def test_m2o():
-    from datetime import datetime
+    from calibre.utils.date import now as nowf
    from cStringIO import StringIO
    mi = MetaInformation('test & title', ['a"1', "a'2"])
    mi.title_sort = 'a\'"b'
    mi.author_sort = 'author sort'
-    mi.pubdate = datetime.now()
+    mi.pubdate = nowf()
    mi.language = 'en'
    mi.category = 'test'
    mi.comments = 'what a fun book\n\n'
@ -1103,7 +1100,7 @@ def test_m2o():
    mi.series = 's"c\'l&<>'
    mi.series_index = 3.34
    mi.rating = 3
-    mi.timestamp = datetime.now()
+    mi.timestamp = nowf()
    mi.publication_type = 'ooooo'
    mi.rights = 'yes'
    mi.cover = 'asd.jpg'
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@ -13,6 +13,9 @@ from calibre.ptempfile import PersistentTemporaryFile
 from calibre.libunrar import extract_member, names
 def get_metadata(stream):
    from calibre.ebooks.metadata.archive import is_comic
    from calibre.ebooks.metadata.meta import get_metadata
    path = getattr(stream, 'name', False)
    if not path:
        pt = PersistentTemporaryFile('_rar-meta.rar')
@ -21,6 +24,8 @@ def get_metadata(stream):
        path = pt.name
    path = os.path.abspath(path)
    file_names = list(names(path))
    if is_comic(file_names):
        return get_metadata(stream, 'cbr')
    for f in file_names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
@ -29,8 +34,7 @@ def get_metadata(stream):
                               'rb', 'imp', 'pdf', 'lrf'):
                data = extract_member(path, match=None, name=f)[1]
                stream = StringIO(data)
                from calibre.ebooks.metadata.meta import get_metadata
                return get_metadata(stream, stream_type)
-    raise ValueError('No ebook found in RAR archive') 
+    raise ValueError('No ebook found in RAR archive')
-        
+
--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@ -8,15 +8,21 @@ from cStringIO import StringIO
 def get_metadata(stream):
    from calibre.ebooks.metadata.meta import get_metadata
    from calibre.ebooks.metadata.archive import is_comic
    stream_type = None
    zf = ZipFile(stream, 'r')
-    for f in zf.namelist():
+    names = zf.namelist()
    if is_comic(names):
        # Is probably a comic
        return get_metadata(stream, 'cbz')
    for f in names:
        stream_type = os.path.splitext(f)[1].lower()
        if stream_type:
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
                from calibre.ebooks.metadata.meta import get_metadata
                stream = StringIO(zf.read(f))
                return get_metadata(stream, stream_type)
    raise ValueError('No ebook found in ZIP archive')
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -4,13 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''
 import datetime
 import functools
 import os
 import re
 import struct
 import textwrap
 import cStringIO
 try:
@ -23,6 +21,7 @@ from lxml import html, etree
 from calibre import entity_to_unicode, CurrentDir
 from calibre.utils.filenames import ascii_filename
 from calibre.utils.date import parse_date
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks import DRMError
 from calibre.ebooks.chardet import ENCODING_PATS
@ -68,7 +67,10 @@ class EXTHHeader(object):
                pass
            elif id == 503: # Long title
                if not title or title == _('Unknown'):
-                    title = content
+                    try:
                        title = content.decode(codec)
                    except:
                        pass
            #else:
            #    print 'unknown record', id, repr(content)
        if title:
@ -96,8 +98,7 @@ class EXTHHeader(object):
            self.mi.tags = list(set(self.mi.tags))
        elif id == 106:
            try:
-                self.mi.publish_date = datetime.datetime.strptime(
+                self.mi.pubdate = parse_date(content, as_utc=False)
                    content, '%Y-%m-%d', ).date()
            except:
                pass
        elif id == 108:
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -310,6 +310,7 @@ class Serializer(object):
        text = text.replace('&', '&amp;')
        text = text.replace('<', '&lt;')
        text = text.replace('>', '&gt;')
        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
        self.buffer.write(encode(text))
@ -610,12 +611,21 @@ class MobiWriter(object):
            if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
                if offset != previousOffset + previousLength :
                    self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
-                    self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
+                    self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
                        (i-1, entries[i-1].title, previousOffset, previousLength) )
                    self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
                        (i, child.title, offset, previousOffset + previousLength) )
-                    self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
+                    # self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
-                    self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
+                    # self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
                    # Dump the offending entry
                    self._oeb.log.info("...")
                    for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)):
                        if z == i:
                            self._oeb.log.warning("child %03d: %s" % (z, entries[z]))
                        else:
                            self._oeb.log.info("child %03d: %s" % (z, entries[z]))
                    self._oeb.log.info("...")
                    self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
                    # Zero out self._HTMLRecords, return False
                    self._HTMLRecords = []
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
 from urlparse import urljoin
 from lxml import etree, html
 from cssutils import CSSParser
 import calibre
-from cssutils import CSSParser
+from calibre.constants import filesystem_encoding
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -434,10 +435,18 @@ class DirContainer(object):
    def namelist(self):
        names = []
-        for root, dirs, files in os.walk(self.rootdir):
+        base = self.rootdir
        if isinstance(base, unicode):
            base = base.encode(filesystem_encoding)
        for root, dirs, files in os.walk(base):
            for fname in files:
                fname = os.path.join(root, fname)
                fname = fname.replace('\\', '/')
                if not isinstance(fname, unicode):
                    try:
                        fname = fname.decode(filesystem_encoding)
                    except:
                        continue
                names.append(fname)
        return names
@ -842,8 +851,10 @@ class Manifest(object):
                    self.oeb.log.warn('File %r appears to be a HTML fragment'%self.href)
                    nroot = etree.fromstring('<html><body/></html>')
                    parent = nroot[0]
-                for child in list(data):
+                for child in list(data.iter()):
-                    child.getparent().remove(child)
+                    oparent = child.getparent()
                    if oparent is not None:
                        oparent.remove(child)
                    parent.append(child)
                data = nroot
@ -1567,14 +1578,17 @@ class TOC(object):
            parent = etree.Element(NCX('navMap'))
        for node in self.nodes:
            id = node.id or unicode(uuid.uuid4())
-            attrib = {'id': id, 'playOrder': str(node.play_order)}
+            po = node.play_order
            if po == 0:
                po = 1
            attrib = {'id': id, 'playOrder': str(po)}
            if node.klass:
                attrib['class'] = node.klass
            point = element(parent, NCX('navPoint'), attrib=attrib)
            label = etree.SubElement(point, NCX('navLabel'))
            title = node.title
            if title:
-                title = re.sub(r'\s', ' ', title)
+                title = re.sub(r'\s+', ' ', title)
            element(label, NCX('text')).text = title
            element(point, NCX('content'), src=urlunquote(node.href))
            node.to_ncx(point)
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -120,7 +120,10 @@ class EbookIterator(object):
        bad_map = {}
        font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
        for csspath in css_files:
-            css = open(csspath, 'rb').read().decode('utf-8', 'replace')
+            try:
                css = open(csspath, 'rb').read().decode('utf-8', 'replace')
            except:
                continue
            for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
                block  = match.group(1)
                family = font_family_pat.search(block)
@ -181,8 +184,9 @@ class EbookIterator(object):
        if hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
-
+        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
-        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
+        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -7,7 +7,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
-from datetime import datetime
+from calibre.utils.date import isoformat, now
 def meta_info_to_oeb_metadata(mi, m, log):
    from calibre.ebooks.oeb.base import OPF
@ -60,10 +60,10 @@ def meta_info_to_oeb_metadata(mi, m, log):
            m.add('subject', t)
    if mi.pubdate is not None:
        m.clear('date')
-        m.add('date', mi.pubdate.isoformat())
+        m.add('date', isoformat(mi.pubdate))
    if mi.timestamp is not None:
        m.clear('timestamp')
-        m.add('timestamp', mi.timestamp.isoformat())
+        m.add('timestamp', isoformat(mi.timestamp))
    if mi.rights is not None:
        m.clear('rights')
        m.add('rights', mi.rights)
@ -71,7 +71,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
        m.clear('publication_type')
        m.add('publication_type', mi.publication_type)
    if not m.timestamp:
-        m.add('timestamp', datetime.now().isoformat())
+        m.add('timestamp', isoformat(now()))
 class MergeMetadata(object):
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -35,7 +35,10 @@ class RescaleImages(object):
                if not raw: continue
                if qt:
                    img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
-                    if not img.loadFromData(raw): continue
+                    try:
                        if not img.loadFromData(raw): continue
                    except:
                        continue
                    width, height = img.width(), img.height()
                else:
                    f = cStringIO.StringIO(raw)
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -42,9 +42,9 @@ class Writer(FormatWriter):
        pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
        text, text_sizes = self._text(pml)
-        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
+        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
-        chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
+        chapter_index += self._index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
-        chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
+        chapter_index += self._index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
        link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
        images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
        metadata = [self._metadata(metadata)]
--- a/src/calibre/ebooks/pdf/main.cpp
+++ b/src/calibre/ebooks/pdf/main.cpp
@ -169,6 +169,8 @@ int main(int argc, char **argv) {
    char *memblock;
    ifstream::pos_type size;
    int ret = 0;
    map<string,string> info;
    Reflow *reflow = NULL;
    if (argc != 2)  {
@ -189,9 +191,13 @@ int main(int argc, char **argv) {
    }
    try {
-        Reflow reflow(memblock, size);
+        reflow = new Reflow(memblock, size);
-        reflow.render();
+        info = reflow->get_info();
-        vector<char> *data = reflow.render_first_page();
+        for (map<string,string>::const_iterator it = info.begin() ; it != info.end(); it++ ) {
            cout << (*it).first << " : " << (*it).second << endl;
        }
        //reflow->render();
        vector<char> *data = reflow->render_first_page();
        ofstream file("cover.png", ios::binary);
        file.write(&((*data)[0]), data->size());
        delete data;
@ -200,7 +206,7 @@ int main(int argc, char **argv) {
        cerr << e.what() << endl;
        ret = 1;
    }
-
+    delete reflow;
    delete[] memblock;
    return ret;
 }
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import sys
+import sys, os
 from lxml import etree
@ -47,6 +47,10 @@ class Image(Element):
        return '<img src="%s" width="%dpx" height="%dpx"/>' % \
                (self.src, int(self.width), int(self.height))
    def dump(self, f):
        f.write(self.to_html())
        f.write('\n')
 class Text(Element):
@ -91,6 +95,10 @@ class Text(Element):
    def to_html(self):
        return self.raw
    def dump(self, f):
        f.write(self.to_html().encode('utf-8'))
        f.write('\n')
 class FontSizeStats(dict):
    def __init__(self, stats):
@ -143,6 +151,14 @@ class Column(object):
    def add(self, elem):
        if elem in self.elements: return
        self.elements.append(elem)
        self._post_add()
    def prepend(self, elem):
        if elem in self.elements: return
        self.elements.insert(0, elem)
        self._post_add()
    def _post_add(self):
        self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
        self.top = self.elements[0].top
        self.bottom = self.elements[-1].bottom
@ -183,6 +199,11 @@ class Column(object):
            return None
        return self.elements[idx-1]
    def dump(self, f, num):
        f.write('******** Column %d\n\n'%num)
        for elem in self.elements:
            elem.dump(f)
 class Box(list):
@ -262,7 +283,6 @@ class Region(object):
            max_lines = max(max_lines, len(c))
        return max_lines
    @property
    def is_small(self):
        return self.line_count < 3
@ -283,7 +303,6 @@ class Region(object):
                mc = self.columns[0]
            return mc
        print
        for c in singleton.columns:
            for elem in c:
                col = most_suitable_column(elem)
@ -304,6 +323,51 @@ class Region(object):
        for x in self.columns:
            yield x
    def absorb_regions(self, regions, at):
        for region in regions:
            self.absorb_region(region, at)
    def absorb_region(self, region, at):
        if len(region.columns) <= len(self.columns):
            for i in range(len(region.columns)):
                src, dest = region.columns[i], self.columns[i]
                if at != 'bottom':
                    src = reversed(list(iter(src)))
                for elem in src:
                    func = dest.add if at == 'bottom' else dest.prepend
                    func(elem)
        else:
            col_map = {}
            for i, col in enumerate(region.columns):
                max_overlap, max_overlap_index = 0, 0
                for j, dcol in enumerate(self.columns):
                    sint = Interval(col.left, col.right)
                    dint = Interval(dcol.left, dcol.right)
                    width = sint.intersection(dint).width
                    if width > max_overlap:
                        max_overlap = width
                        max_overlap_index = j
                col_map[i] = max_overlap_index
            lines = max(map(len, region.columns))
            if at == 'bottom':
                lines = range(lines)
            else:
                lines = range(lines-1, -1, -1)
            for i in lines:
                for j, src in enumerate(region.columns):
                    dest = self.columns[col_map[j]]
                    if i < len(src):
                        func = dest.add if at == 'bottom' else dest.prepend
                        func(src.elements[i])
    def dump(self, f):
        f.write('############################################################\n')
        f.write('########## Region (%d columns) ###############\n'%len(self.columns))
        f.write('############################################################\n\n')
        for i, col in enumerate(self.columns):
            col.dump(f, i)
    def linearize(self):
        self.elements = []
        for x in self.columns:
@ -376,7 +440,8 @@ class Page(object):
                self.font_size_stats[t.font_size] = 0
            self.font_size_stats[t.font_size] += len(t.text_as_string)
            self.average_text_height += t.height
-        self.average_text_height /= len(self.texts)
+        if len(self.texts):
            self.average_text_height /= len(self.texts)
        self.font_size_stats = FontSizeStats(self.font_size_stats)
@ -431,31 +496,78 @@ class Page(object):
        if not current_region.is_empty:
            self.regions.append(current_region)
        if self.opts.verbose > 2:
            self.debug_dir = 'page-%d'%self.number
            os.mkdir(self.debug_dir)
            self.dump_regions('pre-coalesce')
        self.coalesce_regions()
        self.dump_regions('post-coalesce')
    def dump_regions(self, fname):
        fname = 'regions-'+fname+'.txt'
        with open(os.path.join(self.debug_dir, fname), 'wb') as f:
            f.write('Page #%d\n\n'%self.number)
            for region in self.regions:
                region.dump(f)
    def coalesce_regions(self):
        # find contiguous sets of small regions
        # absorb into a neighboring region (prefer the one with number of cols
        # closer to the avg number of cols in the set, if equal use larger
        # region)
        # merge contiguous regions that can contain each other
        absorbed = set([])
        found = True
        absorbed = set([])
        processed = set([])
        while found:
            found = False
            for i, region in enumerate(self.regions):
-                if region.is_small:
+                if region in absorbed:
                    continue
                if region.is_small and region not in processed:
                    found = True
-                    regions = []
+                    processed.add(region)
                    regions = [region]
                    end = i+1
                    for j in range(i+1, len(self.regions)):
                        end = j
                        if self.regions[j].is_small:
                            regions.append(self.regions[j])
                        else:
                            break
-                    prev = None if i == 0 else i-1
+                    prev_region = None if i == 0 else i-1
-                    next = j if self.regions[j] not in regions else None
+                    next_region = end if end < len(self.regions) and self.regions[end] not in regions else None
-
+                    absorb_at = 'bottom'
-
+                    if prev_region is None and next_region is not None:
                        absorb_into = next_region
                        absorb_at = 'top'
                    elif next_region is None and prev_region is not None:
                        absorb_into = prev_region
                    elif prev_region is None and next_region is None:
                        if len(regions) > 1:
                            absorb_into = i
                            regions = regions[1:]
                        else:
                            absorb_into = None
                    else:
                        absorb_into = prev_region
                        if self.regions[next_region].line_count >= \
                                self.regions[prev_region].line_count:
                            avg_column_count = sum([len(r.columns) for r in
                                regions])/float(len(regions))
                            if self.regions[next_region].line_count > \
                                    self.regions[prev_region].line_count \
                               or abs(avg_column_count -
                                       len(self.regions[prev_region].columns)) \
                               > abs(avg_column_count -
                                       len(self.regions[next_region].columns)):
                                   absorb_into = next_region
                                   absorb_at = 'top'
                    if absorb_into is not None:
                        self.regions[absorb_into].absorb_regions(regions, absorb_at)
                        absorbed.update(regions)
        for region in absorbed:
            self.regions.remove(region)
    def sort_into_columns(self, elem, neighbors):
        neighbors.add(elem)
@ -575,8 +687,9 @@ class PDFDocument(object):
        for elem in self.elements:
            html.extend(elem.to_html())
        html += ['</body>', '</html>']
        raw = (u'\n'.join(html)).replace('</strong><strong>', '')
        with open('index.html', 'wb') as f:
-            f.write((u'\n'.join(html)).encode('utf-8'))
+            f.write(raw.encode('utf-8'))
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -182,10 +182,10 @@ class PML_HTMLizer(object):
        return pml
    def strip_pml(self, pml):
-        pml = re.sub(r'\\C\d=".+*"', '', pml)
+        pml = re.sub(r'\\C\d=".*"', '', pml)
-        pml = re.sub(r'\\Fn=".+*"', '', pml)
+        pml = re.sub(r'\\Fn=".*"', '', pml)
-        pml = re.sub(r'\\Sd=".+*"', '', pml)
+        pml = re.sub(r'\\Sd=".*"', '', pml)
-        pml = re.sub(r'\\.=".+*"', '', pml)
+        pml = re.sub(r'\\.=".*"', '', pml)
        pml = re.sub(r'\\X\d', '', pml)
        pml = re.sub(r'\\S[pbd]', '', pml)
        pml = re.sub(r'\\Fn', '', pml)
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -27,7 +27,7 @@ from calibre.ebooks.rtf2xml import headings_to_sections, \
    paragraph_def, convert_to_tags, output, copy, \
    list_numbers, info, pict, table_info, fonts, paragraphs, \
    body_styles, preamble_rest, group_styles, \
-    inline, correct_unicode
+    inline
 from calibre.ebooks.rtf2xml.old_rtf import OldRtf
 """
@ -256,15 +256,6 @@ class ParseRtf:
           )
        pict_obj.process_pict()
        self.__bracket_match('pict_data_info')
        correct_uni_obj = correct_unicode.CorrectUnicode(
            in_file = self.__temp_file,
            bug_handler = RtfInvalidCodeException,
            copy = self.__copy,
            run_level = self.__run_level,
            exception_handler = InvalidRtfException,
           )
        correct_uni_obj.correct_unicode()
        self.__bracket_match('correct_unicode_info')
        combine_obj = combine_borders.CombineBorders(
            in_file = self.__temp_file,
            bug_handler = RtfInvalidCodeException,
--- a/src/calibre/ebooks/rtf2xml/correct_unicode.py
+++ b/src/calibre/ebooks/rtf2xml/correct_unicode.py
@ -1,94 +0,0 @@
 #########################################################################
 #                                                                       #
 #                                                                       #
 #   copyright 2002 Paul Henry Tremblay                                  #
 #                                                                       #
 #   This program is distributed in the hope that it will be useful,     #
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
 #   General Public License for more details.                            #
 #                                                                       #
 #   You should have received a copy of the GNU General Public License   #
 #   along with this program; if not, write to the Free Software         #
 #   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
 #   02111-1307 USA                                                      #
 #                                                                       #
 #                                                                       #
 #########################################################################
 import os, re,  tempfile
 from calibre.ebooks.rtf2xml import copy
 class CorrectUnicode:
    """
    corrects sequences such as \u201c\'F0\'BE
    Where \'F0\'BE has to be eliminated.
    """
    def __init__(self,
            in_file,
            exception_handler,
            bug_handler,
            copy = None,
            run_level = 1,
            ):
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__exception_handler = exception_handler
        self.__bug_handler = bug_handler
        self.__state = 'outside'
        self.__utf_exp = re.compile(r'&#x(.*?);')
    def __process_token(self, line):
        if self.__state == 'outside':
            if line[:5] == 'tx<ut':
                self.__handle_unicode(line)
            else:
                self.__write_obj.write(line)
        elif self.__state == 'after':
            if line[:5] == 'tx<hx':
                pass
            elif line[:5] == 'tx<ut':
                self.__handle_unicode(line)
            else:
                self.__state = 'outside'
                self.__write_obj.write(line)
        else:
            raise 'should\'t happen'
    def __handle_unicode(self, line):
        token = line[16:]
        match_obj = re.search(self.__utf_exp, token)
        if match_obj:
            uni_char = match_obj.group(1)
            dec_num = int(uni_char, 16)
            if dec_num > 57343 and dec_num < 63743:
                self.__state = 'outside'
            else:
                self.__write_obj.write(line)
                self.__state = 'after'
        else:
            self.__write_obj.write(line)
            self.__state = 'outside'
    def correct_unicode(self):
        """
        Requires:
            nothing
        Returns:
            nothing (changes the original file)
        Logic:
            Read one line in at a time.
        """
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            self.__token_info = line[:16]
            self.__process_token(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "correct_unicode.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
--- a/Show More
+++ b/Show More