Pull from trunk

2025-07-09 03:04:10 -04:00 · 2010-07-09 21:54:24 -06:00 · 2010-07-09 21:54:24 -06:00 · 8e46a15325
commit 8e46a15325
parent d27c744fc3 e74bd1d1f1
217 changed files with 91880 additions and 69270 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,391 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 - version: 0.7.8
  date: 2010-07-09
  new features:
    - title: "New tool to help prepare EPUBs for publication"
      type: major
      description: >
        "calibre now contains a new command line tool called epub-fix that can automatically fix
        common problems in EPUB files that cause them to be rejected by poorly designed publishing services.
        The tool is plugin based for extensible functionality in the future. Currently, it can fix unmanifested files
        and workaround the date and svg preserveaspectratio bugs of epubcheck."
    - title: "New icons for the toolbar buttons by Kamil Tatara"
    - title: "Display rating (when available) in cover browser"
    - title: "Clicking on the central cover int the cover browser now opens that book in the viewer"
    - title: "Use the status bar instead of the area to the right of the location view to display status information"
    - title: "Driver for the Pandigital Novel e-book reader"
  bug fixes:
    - title: "News download: Don not specify a font family for article descriptions"
    - title: "News download: Fix regression introduced in 0.7.0 that broke download of some embedded content feeds"
    - title: "MOBI Output: Partial support for nested superscript and subscripts."
      tickets: [6132]
    - title: "CHM Input: Fix handling of buggy CHM files with no .hhc"
      tickets: [6087]
    - title: "EPUB Input: Fix bug in unzipping EPUB files that have been zipped in depth first order."
      tickets: [6127]
    - title: "TXT Input: Convert HTML entities to characters."
      tickets: [6114]
    - title: "LRF Input: Handle LRF files with random null bytes in the text"
      tickets: [6097]
    - title: "Kobo driver: Fix detection of txt/html files on the device"
    - title: "Fix opening of books when calibre library is on an unmapped network share in windows"
    - title: "SONY driver: Only update the timestamp in the XML db for newly added books"
    - title: "Cover browser: Fix rendering of center cover when width of cover browser is less than the width of a single cover"
    - title: "Cover browser: Correct fix for setPixel out of bounds warning causing UI slowdown in calibre"
  new recipes:
    - title: "evz.ro"
      author: Darko Miletic
    - title: "Anchorage Daily News, China Economic Net, BBC Chinese and Singtao Daily"
      author: rty
    - title: Big Oven
      author: Starson17
  improved recipes:
    - Haaretz
    - Editor and Publisher
    - Estadao
 - version: 0.7.7
  date: 2010-07-02
  new features:
    - title: "Support for the Nokia E52"
    - title: "Searching on the size column"
    - title: "iTunes driver: Add option to disable cover fetching for speeding up the fetching of large book collections"
  bug fixes:
    - title: "SONY driver: Only update metadata when books are sent to device."
    - title: "TXT Input: Ensure the generated html is splittable"
      tickets: [5904]
    - title: "Fix infinite loop in default cover generation."
      tickets: [6061]
    - title: "HTML Input: Fix a parsing bug that was triggered in rare conditions"
      tickets: [6064]
    - title: "HTML2Zip plugin: Do not replace ligatures"
      tickets: [6019]
    - title: "iTunes driver: Fix transmission of non integral series numbers"
      tickets: [6046]
    - title: "Simplify implementation of cover caching and ensure cover browser is updated when covers are changed"
    - title: "PDF metadata: Fix last character corrupted when setting metadata in encrypted files."
    - title: "PDF metadata: Update the version of PoDoFo used to set metadata to 0.8.1. Hopefully that means more PDF files will work"
    - title: "Device drivers: Speedup for dumping metadata cache to devices on Windows XP"
    - title: "EPUB Output: Ensure that language setting is conformant to the specs"
    - title: "MOBI Output: Fix a memory leak and a crash in the palmdoc compression routine"
    - title: "Metadata download: Fix a regression that resulted in a failed download for some books"
  new recipes:
    - title: "Foreign Policy and Alo!"
      author: Darko Miletic
    - title: Statesman and ifzm
      author: rty
  improved recipes:
    - Akter
    - The Old New Thing
 - version: 0.7.6
  date: 2010-06-28
  new features:
    - title: "Add support for the new firmware of the Azbooka"
      tickets: [5994]
    - title: "A few speedups for calibre startup, should add up to a few seconds of startup time on slower machines"
    - title: "Support for the Sweem MM300"
    - title: "Add keyboard shorcut for Download metadata and covers"
  bug fixes:
    - title: "Fix regression in 0.7.5 that broke conversion of malformed HTML files (like those Microsoft Word outputs)"
      type: major
      tickets: [5991]
    - title: "Don't download tags from librarything, as the tagging there is not very good"
    - title: "Add mimetype for FB2 so that it can be served by the content server"
      tickets: [6011]
    - title: "Ensure cover is not resized to less than the available space in the Edit Meta Information dialog"
      tickets: [6001]
    - title: "SONY driver: Only update collections when sending book to device for the first time"
    - title: "calibre should now work on windows when the location for the library contains non-ascii characters"
      tickets: [5983]
    - title: "Cover browser once again distorts instead of cropping covers that have an incorrect aspect ratio"
    - title: "ISBNDb metadata plugin: Fix bug causing only first page of results to be fetched"
    - title: "Move iTunes driver to the bottom so that it doesn't interfere with device detection for people that have iphones and an ereader plugged in"
  improved recipes:
    - Houston Chronicle
    - Hindu
    - Times of India
    - New York Times
  new recipes:
    - title: Winnipeg Sun
      author: rty
 - version: 0.7.5
  date: 2010-06-25
  new features:
    - title: "New driver for the Kobo featuring closer integration with the device."
    - title: "Support for the Dell Streak, Eken Android tablet and the Astak Mentor EB600"
    - title: "New series type custom column"
    - title: "Add option in Send to device menu to connect to iTunes without any iDevice (experimental)"
    - title: "iPad driver: Make setting iTunes Category from series optional. News download now optimizations for iPad output."
    - title: "Add option to disable book cover animation"
      tickets: [5909]
    - title: "Edit meta information dialog: Remember last used size and splitter position."
      tickets: [5908]
    - title: "Metadata download: If any results have a published date, ensure they all do"
    - title: "SONY driver: Add a preference setting in Preferences->Add/Save->Send to device to control how colelctions are managed on the device by calibre"
    - title: "Metadata download: Filter out non book results. Also sort results by availability of covers for the isbn"
      tickets: [5946]
    - title: "Bulk editing for device collections in the device view via the context menu"
  bug fixes:
    - title: "When converting books using the calibre GUI, set the language of the output book to be the same as the language of the User Interface, instead of undefined. Fixes use of dictionary in iBooks"
    - title: "PDF Output: Fix setting top/bottom margnis has no effect" 
    - title: "Conversion pipeline: Fix typo causing remove footer regex to always fail"
    - title: "Handle device being yanked with queued device jobs gracefully"
    - title: "Conversion pipeline: Handle deeply nested XML structures"
      tickets: [5931]
    - title: "Conversion pipeline: Fix handling of lists with a specified left margin"
      tickets: [5877]
    - title: "Restore workaround for ADE buggy rendering of anchors as links. However, make it overridable by extra CSS"
    - title: "Fix LibraryThing metadata download plugin"
    - title: "Fix multiple ratings displayed in Tag Browser for some legacy databases"
    - title: "Fix invocation of postprocess file type plugins plugins"
    - title: "HTML Input: Handle @import directives in linked css files."
      tickets: [5135]
    - title: "HTML Input: Handle absolute paths in resource links on windows correctly."
      tickets: [3031]
    - title: "E-book viewer: Handle font-face rules specify multiple families to be substituted"
    - title: "Cover browser: Set aspect ratio of covers to 3:4 instead of 2:3. Crop rather than distort covers whoose aspect ratio is different from this. Antialias the rendering of the central cover"
    - title: "Reset Tag browser if the text in the search box is edited"
    - title: "Fix detection of SD card in Samsung Galaxy windows driver"
  new recipes:
    - title: "L'Osservatore Romano"
      author: Darko Miletic
    - title: China Press, London Free Press, People Daily
      author: rty
  improved recipes:
    - Zaobao
    - New Scientist
    - National Post
    - London review of books
 - version: 0.7.4
  date: 2010-06-19
  bug fixes:
    - title: "Fix regression in 0.7.3 that broke creating custom columns of rating or text types"
    - title: "Fix cover browser breaking if you click on a book in the book list while cover browser is animated"
    - title: "Fix a bug that could be triggered with the new book details pane if a book has a zero size cover"
      tickets: [5889]
    - title: "SONY driver: Fix bug preventing the editing of collections in the device view"
  new recipes:
    - title: Auto Prove
      author: Gabriele Marini
    - title: Forbes India, Maximum PC, Today Online
      author: rty
  improved recipes:
    - WSJ
    - Psychology Today
 - version: 0.7.3
  date: 2010-06-18
  new features:
    - title: "The Tag Browser now display an average rating for each item"
      type: major
      description: >
        "
        The icons of each individual item in the Tag Browser are now partially colored to indicate the average rating of
        all books belonging to that category. For example, the icon next to each author is partially colored based on the
        averagerating of all books by that author in your calibre library. You can also hover your mouse over the item to
        see the average rating in a tooltip. Can be turned off via Preferences->Interface
        "
    - title: "Editable author sort for each author"
      type: major
      description: >
        "calibre has always allowed you to specify the author sort for each bookin your collection. Now you
        can also specify the way the name of each individual author should be sorted. This is used to display the list
        of authors in the Tag Browser and OPDS feeds in the Content Server"
    - title: "When downloading metadata, also get series information from librarything.com"
      type: major
      tickets: [5148]
    - title: "Redesign of the Book Details pane"
      type: major
      description: >
        "The Book details pane now display covers with animation. Also instead of showing the full path to the book, you now have
        clickable links to open the containing folder or individual formats. The path information is still accessible via a tooltip"
    - title: "New User Interface layouts"
      type: major
      description: >
        "calibre now has two user interface layouts selectable from Preferences->Interface. The 'wide' layout has the book details pane on the side
        and the 'narrow' layout has it on the bottom. The default layout is now wide."
    - title: "You can now add books directly from the device to the calibre library by right clicking on the books in the device views"
    - title: "iPad driver: Create category from series preferentially, also handle series sorting"
    - title: "SONY driver: Add an option to use author_sort instead of author when sending to device"
    - title: "Hitting Enter in the search box now causes the search to be re-run"
      tickets: [5856]
    - title: "Boox driver: Make destination directory for books customizable"
    - title: "Add plugin to download metadata from douban.com. Disabled by default."
    - title: "OS X/linux driver for PocketBook 301"
    - title: "Support for the Samsung Galaxy and Sigmatek EBK52"
    - title: "On startup do not focus the search bar. Instead you can acces the search bar easily by pressing the / key or the standard search keyboard shortcut for your operating system"
  bug fixes:
    - title: "iPad driver: Various bug fixes"
    - title: "Kobo Output profile: Adjust the screen dimensions when converting comics"
    - title: "Fix using Preferences when a device is connected causes items in device menu to be disabled"
    - title: "CHM Input: Skip files whoose names are too long for windows"
    - title: "Brighten up calibre icon on dark backgrounds"
    - title: "Ignore 'Unknown' in title/autors when downloading metadata"
      tickets: [5633]
    - title: "Fix regression that broke various entries in the menus - Preferences, Open containing folder and Edit metadata individually"
    - title: "EPUB metadata: Handle comma separated entries in <dc:subject> tags correctly"
      tickets: [5855]
    - title: "MOBI Output: Fix underlines not being rendered"
      tickets: [5830]
    - title: "EPUB Output: Remove workaround for old versions of Adobe Digital Editions' faulty rendering of links in html. calibre no longer forces links to be blue and underlined"
    - title: "Fix a bug that could cause the show pane buttons to not show hidden panes"
    - title: "Fix Tag Editor does not reflect recently changed data in Tag Catagory Text Box"
      tickets: [5809]
    - title: "Content server: Fix sorting of books by authors instead of author_sort in the main and mobile views"
    - title: "Cover cache: Resize covers larger than 600x800 in the cover cache to reduce memory consumption in the GUI"
    - title: "EPUB Output: Default cover is generated is now generated as a JPEG instead of PNG32, reducing size by an order of magnitude."
      tickets: [5810]
    - title: "Cover Browser: Scale text size with height of cover browser. Only show a reflection of half the cover. Also restore rendering quality after regression in 0.7.1"
      tickets: [5808]
    - title: "Book list: Do not let the default layout have any column wider than 350 pixels"
  new recipes:
    - title: Akter 
      author: Darko Miletic
    - title: Thai Rath and The Nation (Thailand)
      author: Anat Ruangrassamee
  improved recipes:
    - Wall Street Journal 
    - New York Times
    - Slashdot
    - Publico
    - Danas
 - version: 0.7.2
  date: 2010-06-11
--- a/resources/images/add_book.svg
+++ b/resources/images/add_book.svg
--- a/resources/images/config.svg
+++ b/resources/images/config.svg
--- a/resources/images/convert.svg
+++ b/resources/images/convert.svg
--- a/resources/images/default_cover.svg
+++ b/resources/images/default_cover.svg
--- a/resources/images/devices/ipad.png
+++ b/resources/images/devices/ipad.png
--- a/resources/images/devices/itunes.png
+++ b/resources/images/devices/itunes.png
--- a/resources/images/dialog_information.svg
+++ b/resources/images/dialog_information.svg
@ -1752,7 +1752,7 @@
     sodipodi:cy="93.331604"
     sodipodi:cx="-166.53223"
     id="path6082"
-     style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
+     style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
     sodipodi:type="arc" /></clipPath><radialGradient
   inkscape:collect="always"
   xlink:href="#linearGradient5990"
@ -2513,7 +2513,7 @@
   transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)"
   clip-path="none" /><path
   sodipodi:type="arc"
-   style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
+   style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
   id="path3915"
   sodipodi:cx="-166.53223"
   sodipodi:cy="93.331604"
@ -2901,22 +2901,8 @@
   id="g133">
 		<defs
   id="defs135" />
 		<use
   id="use138"
   x="0"
   y="0"
   width="121"
   height="120" />
 		<clipPath
   id="XMLID_215_">
 			<use
   id="use141"
   x="0"
   y="0"
   width="121"
   height="120" />
 		</clipPath>
 		<g
   clip-path="url(#XMLID_215_)"
--- a/resources/images/dialog_question.svg
+++ b/resources/images/dialog_question.svg
@ -0,0 +1,269 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448)  -->
 <svg
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:svg="http://www.w3.org/2000/svg"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   version="1.0"
   id="Livello_1"
   width="128"
   height="128"
   viewBox="0 0 139 139"
   overflow="visible"
   enable-background="new 0 0 139 139"
   xml:space="preserve"
   sodipodi:version="0.32"
   inkscape:version="0.45+devel"
   sodipodi:docname="system-help.svgz"
   inkscape:output_extension="org.inkscape.output.svgz.inkscape"
   style="overflow:visible"><metadata
   id="metadata3164"><rdf:RDF><cc:Work
       rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
         rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
   id="defs3162"><filter
     inkscape:collect="always"
     x="-0.132641"
     width="1.265282"
     y="-0.34752154"
     height="1.6950431"
     id="filter3547"><feGaussianBlur
       inkscape:collect="always"
       stdDeviation="2.7512044"
       id="feGaussianBlur3549" /></filter><filter
     inkscape:collect="always"
     id="filter5097"><feGaussianBlur
       inkscape:collect="always"
       stdDeviation="2.32"
       id="feGaussianBlur5099" /></filter><filter
     inkscape:collect="always"
     x="-0.143268"
     width="1.286536"
     y="-0.072184406"
     height="1.1443688"
     id="filter5125"><feGaussianBlur
       inkscape:collect="always"
       stdDeviation="1.91024"
       id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
   inkscape:window-height="697"
   inkscape:window-width="1024"
   inkscape:pageshadow="2"
   inkscape:pageopacity="0.0"
   guidetolerance="10.0"
   gridtolerance="10.0"
   objecttolerance="10.0"
   borderopacity="1.0"
   bordercolor="#666666"
   pagecolor="#ffffff"
   id="base"
   inkscape:zoom="2.9352518"
   inkscape:cx="99.496726"
   inkscape:cy="69.329657"
   inkscape:window-x="0"
   inkscape:window-y="0"
   inkscape:current-layer="Livello_1"
   height="128px"
   width="128px" />
 <filter
   id="AI_Sfocatura_4">
 	<feGaussianBlur
   stdDeviation="4"
   id="feGaussianBlur3096" />
 </filter>
 <filter
   id="AI_Sfocatura_2">
 	<feGaussianBlur
   stdDeviation="2"
   id="feGaussianBlur3099" />
 </filter>
 <radialGradient
   id="XMLID_12_"
   cx="69.600098"
   cy="69.576698"
   r="58"
   gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
   gradientUnits="userSpaceOnUse">
 	<stop
   offset="0"
   style="stop-color:#000000"
   id="stop3102" />
 	<stop
   offset="1"
   style="stop-color:#000000;stop-opacity:0;"
   id="stop3104" />
 </radialGradient>
 <circle
   sodipodi:ry="58"
   sodipodi:rx="58"
   sodipodi:cy="69.599998"
   sodipodi:cx="69.599998"
   style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
   id="circle5091"
   r="58"
   cy="69.599998"
   cx="69.599998"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
   cx="69.599998"
   cy="122.173"
   rx="58"
   ry="10.573"
   id="ellipse3106"
   style="opacity:0.6;fill:url(#XMLID_12_)"
   sodipodi:cx="69.599998"
   sodipodi:cy="122.173"
   sodipodi:rx="58"
   sodipodi:ry="10.573"
   transform="translate(-9.9998474e-2,1.9102535)" />
 <radialGradient
   id="XMLID_13_"
   cx="69.600098"
   cy="69.600098"
   r="58"
   gradientUnits="userSpaceOnUse">
 	<stop
   offset="0.6154"
   style="stop-color:#EEEEEE"
   id="stop3113" />
 	<stop
   offset="0.8225"
   style="stop-color:#DDDDDD"
   id="stop3115" />
 	<stop
   offset="1"
   style="stop-color:#FFFFFF"
   id="stop3117" />
 </radialGradient>
 <circle
   cx="69.599998"
   cy="69.599998"
   r="58"
   id="circle3119"
   style="fill:url(#XMLID_13_)"
   sodipodi:cx="69.599998"
   sodipodi:cy="69.599998"
   sodipodi:rx="58"
   sodipodi:ry="58"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
 <linearGradient
   id="XMLID_14_"
   gradientUnits="userSpaceOnUse"
   x1="27.6001"
   y1="69.600098"
   x2="111.6001"
   y2="69.600098"
   gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
 	<stop
   offset="0"
   style="stop-color:#2A94EC"
   id="stop3122" />
 	<stop
   offset="1"
   style="stop-color:#0057AE"
   id="stop3124" />
 </linearGradient>
 <path
   d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
   id="path3126"
   style="fill:url(#XMLID_14_)" />
 <g
   id="circle22111"
   cy="92"
   rx="36"
   ry="36"
   cx="343.99899"
   enable-background="new    "
   style="opacity:0.3;filter:url(#filter3547)"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
 	<path
   d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
   id="path3129"
   style="fill:#a8dde0" />
 </g>
 <linearGradient
   id="circle16776_1_"
   gradientUnits="userSpaceOnUse"
   x1="135.5601"
   y1="417.66461"
   x2="161.87621"
   y2="417.66461"
   gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
 	<stop
   offset="0"
   style="stop-color:#FFFFFF"
   id="stop3132" />
 	<stop
   offset="1"
   style="stop-color:#ffffff;stop-opacity:0;"
   id="stop3134" />
 </linearGradient>
 <path
   id="circle16776"
   enable-background="new    "
   d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
   style="opacity:0.8;fill:url(#circle16776_1_)" />
 <g
   id="g3137"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
 	<defs
   id="defs3139"><path
     id="XMLID_10_"
     d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
 	<clipPath
   id="XMLID_6_">
 		<use
   xlink:href="#XMLID_10_"
   id="use3143"
   x="0"
   y="0"
   width="139"
   height="139" />
 	</clipPath>
 	<g
   clip-path="url(#XMLID_6_)"
   id="g3145"
   style="filter:url(#AI_Sfocatura_2)">
 		<path
   d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
   id="path3147"
   style="fill:none;stroke:#00316e;stroke-width:2" />
 	</g>
 </g>
 <g
   transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
   id="g5119"
   style="fill:#00316e;filter:url(#filter5125)"><path
     style="fill:#00316e"
     d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
     id="path5121" /><circle
     style="fill:#00316e"
     sodipodi:ry="8"
     sodipodi:rx="8"
     sodipodi:cy="93.599998"
     sodipodi:cx="69.599998"
     cx="69.599998"
     cy="93.599998"
     r="8"
     id="circle5123" /></g><g
   id="g5101"
   transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
     id="path3157"
     d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
     style="fill:#ffffff" /><circle
     id="circle3159"
     r="8"
     cy="93.599998"
     cx="69.599998"
     sodipodi:cx="69.599998"
     sodipodi:cy="93.599998"
     sodipodi:rx="8"
     sodipodi:ry="8"
     style="fill:#ffffff" /></g>
 </svg>
--- a/resources/images/edit_copy.svg
+++ b/resources/images/edit_copy.svg
--- a/resources/images/edit_input.svg
+++ b/resources/images/edit_input.svg
--- a/resources/images/help.svg
+++ b/resources/images/help.svg
@ -0,0 +1,269 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448)  -->
 <svg
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:svg="http://www.w3.org/2000/svg"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   version="1.0"
   id="Livello_1"
   width="128"
   height="128"
   viewBox="0 0 139 139"
   overflow="visible"
   enable-background="new 0 0 139 139"
   xml:space="preserve"
   sodipodi:version="0.32"
   inkscape:version="0.45+devel"
   sodipodi:docname="system-help.svgz"
   inkscape:output_extension="org.inkscape.output.svgz.inkscape"
   style="overflow:visible"><metadata
   id="metadata3164"><rdf:RDF><cc:Work
       rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
         rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
   id="defs3162"><filter
     inkscape:collect="always"
     x="-0.132641"
     width="1.265282"
     y="-0.34752154"
     height="1.6950431"
     id="filter3547"><feGaussianBlur
       inkscape:collect="always"
       stdDeviation="2.7512044"
       id="feGaussianBlur3549" /></filter><filter
     inkscape:collect="always"
     id="filter5097"><feGaussianBlur
       inkscape:collect="always"
       stdDeviation="2.32"
       id="feGaussianBlur5099" /></filter><filter
     inkscape:collect="always"
     x="-0.143268"
     width="1.286536"
     y="-0.072184406"
     height="1.1443688"
     id="filter5125"><feGaussianBlur
       inkscape:collect="always"
       stdDeviation="1.91024"
       id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
   inkscape:window-height="697"
   inkscape:window-width="1024"
   inkscape:pageshadow="2"
   inkscape:pageopacity="0.0"
   guidetolerance="10.0"
   gridtolerance="10.0"
   objecttolerance="10.0"
   borderopacity="1.0"
   bordercolor="#666666"
   pagecolor="#ffffff"
   id="base"
   inkscape:zoom="2.9352518"
   inkscape:cx="99.496726"
   inkscape:cy="69.329657"
   inkscape:window-x="0"
   inkscape:window-y="0"
   inkscape:current-layer="Livello_1"
   height="128px"
   width="128px" />
 <filter
   id="AI_Sfocatura_4">
 	<feGaussianBlur
   stdDeviation="4"
   id="feGaussianBlur3096" />
 </filter>
 <filter
   id="AI_Sfocatura_2">
 	<feGaussianBlur
   stdDeviation="2"
   id="feGaussianBlur3099" />
 </filter>
 <radialGradient
   id="XMLID_12_"
   cx="69.600098"
   cy="69.576698"
   r="58"
   gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
   gradientUnits="userSpaceOnUse">
 	<stop
   offset="0"
   style="stop-color:#000000"
   id="stop3102" />
 	<stop
   offset="1"
   style="stop-color:#000000;stop-opacity:0;"
   id="stop3104" />
 </radialGradient>
 <circle
   sodipodi:ry="58"
   sodipodi:rx="58"
   sodipodi:cy="69.599998"
   sodipodi:cx="69.599998"
   style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
   id="circle5091"
   r="58"
   cy="69.599998"
   cx="69.599998"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
   cx="69.599998"
   cy="122.173"
   rx="58"
   ry="10.573"
   id="ellipse3106"
   style="opacity:0.6;fill:url(#XMLID_12_)"
   sodipodi:cx="69.599998"
   sodipodi:cy="122.173"
   sodipodi:rx="58"
   sodipodi:ry="10.573"
   transform="translate(-9.9998474e-2,1.9102535)" />
 <radialGradient
   id="XMLID_13_"
   cx="69.600098"
   cy="69.600098"
   r="58"
   gradientUnits="userSpaceOnUse">
 	<stop
   offset="0.6154"
   style="stop-color:#EEEEEE"
   id="stop3113" />
 	<stop
   offset="0.8225"
   style="stop-color:#DDDDDD"
   id="stop3115" />
 	<stop
   offset="1"
   style="stop-color:#FFFFFF"
   id="stop3117" />
 </radialGradient>
 <circle
   cx="69.599998"
   cy="69.599998"
   r="58"
   id="circle3119"
   style="fill:url(#XMLID_13_)"
   sodipodi:cx="69.599998"
   sodipodi:cy="69.599998"
   sodipodi:rx="58"
   sodipodi:ry="58"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
 <linearGradient
   id="XMLID_14_"
   gradientUnits="userSpaceOnUse"
   x1="27.6001"
   y1="69.600098"
   x2="111.6001"
   y2="69.600098"
   gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
 	<stop
   offset="0"
   style="stop-color:#2A94EC"
   id="stop3122" />
 	<stop
   offset="1"
   style="stop-color:#0057AE"
   id="stop3124" />
 </linearGradient>
 <path
   d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
   id="path3126"
   style="fill:url(#XMLID_14_)" />
 <g
   id="circle22111"
   cy="92"
   rx="36"
   ry="36"
   cx="343.99899"
   enable-background="new    "
   style="opacity:0.3;filter:url(#filter3547)"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
 	<path
   d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
   id="path3129"
   style="fill:#a8dde0" />
 </g>
 <linearGradient
   id="circle16776_1_"
   gradientUnits="userSpaceOnUse"
   x1="135.5601"
   y1="417.66461"
   x2="161.87621"
   y2="417.66461"
   gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
 	<stop
   offset="0"
   style="stop-color:#FFFFFF"
   id="stop3132" />
 	<stop
   offset="1"
   style="stop-color:#ffffff;stop-opacity:0;"
   id="stop3134" />
 </linearGradient>
 <path
   id="circle16776"
   enable-background="new    "
   d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
   style="opacity:0.8;fill:url(#circle16776_1_)" />
 <g
   id="g3137"
   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
 	<defs
   id="defs3139"><path
     id="XMLID_10_"
     d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
 	<clipPath
   id="XMLID_6_">
 		<use
   xlink:href="#XMLID_10_"
   id="use3143"
   x="0"
   y="0"
   width="139"
   height="139" />
 	</clipPath>
 	<g
   clip-path="url(#XMLID_6_)"
   id="g3145"
   style="filter:url(#AI_Sfocatura_2)">
 		<path
   d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
   id="path3147"
   style="fill:none;stroke:#00316e;stroke-width:2" />
 	</g>
 </g>
 <g
   transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
   id="g5119"
   style="fill:#00316e;filter:url(#filter5125)"><path
     style="fill:#00316e"
     d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
     id="path5121" /><circle
     style="fill:#00316e"
     sodipodi:ry="8"
     sodipodi:rx="8"
     sodipodi:cy="93.599998"
     sodipodi:cx="69.599998"
     cx="69.599998"
     cy="93.599998"
     r="8"
     id="circle5123" /></g><g
   id="g5101"
   transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
     id="path3157"
     d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
     style="fill:#ffffff" /><circle
     id="circle3159"
     r="8"
     cy="93.599998"
     cx="69.599998"
     sodipodi:cx="69.599998"
     sodipodi:cy="93.599998"
     sodipodi:rx="8"
     sodipodi:ry="8"
     style="fill:#ffffff" /></g>
 </svg>
--- a/resources/images/news.svg
+++ b/resources/images/news.svg
--- a/resources/images/news/alo_novine.png
+++ b/resources/images/news/alo_novine.png
--- a/resources/images/news/evz.ro.png
+++ b/resources/images/news/evz.ro.png
--- a/resources/images/news/haaretz.png
+++ b/resources/images/news/haaretz.png
--- a/resources/images/news/lrb.png
+++ b/resources/images/news/lrb.png
--- a/resources/images/news/lrb_payed.png
+++ b/resources/images/news/lrb_payed.png
--- a/resources/images/save.svg
+++ b/resources/images/save.svg
--- a/resources/images/sync.svg
+++ b/resources/images/sync.svg
--- a/resources/images/trash.svg
+++ b/resources/images/trash.svg
--- a/resources/images/view.svg
+++ b/resources/images/view.svg
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/resources/recipes/akter.recipe
+++ b/resources/recipes/akter.recipe
@ -15,7 +15,7 @@ class Akter(BasicNewsRecipe):
    category              = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
    oldest_article        = 8
    max_articles_per_feed = 100
-    no_stylesheets        = False
+    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    masthead_url          = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
@ -23,9 +23,9 @@ class Akter(BasicNewsRecipe):
    publication_type      = 'magazine'
    remove_empty_feeds    = True
    PREFIX                 = 'http://www.akter.co.rs'
-    extra_css             = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
+    extra_css             = """
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
-                                .article_description,body,.lokacija{font-family: Arial,Helvetica,sans1,sans-serif}
+                                .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
                                .color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
                                border-left: 1px solid #D00000; color: #D00000}
                                img{margin-bottom: 0.8em} """
--- a/resources/recipes/alo_novine.recipe
+++ b/resources/recipes/alo_novine.recipe
@ -0,0 +1,65 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.alo.rs
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Alo_Novine(BasicNewsRecipe):
    title                 = 'Alo!'
    __author__            = 'Darko Miletic'
    description           = "News Portal from Serbia"
    publisher             = 'Alo novine d.o.o.'
    category              = 'news, politics, Serbia'
    oldest_article        = 2
    max_articles_per_feed = 100
    delay                 = 4
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'sr'
    extra_css             = """
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
                                .lead {font-size: 1.3em}
                                h1{color: #DB0700}
                                .article_uvod{font-style: italic; font-size: 1.2em}
                                img{margin-bottom: 0.8em} """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    remove_tags = [dict(name=['object','link','embed'])]
    remove_attributes = ['height','width']
    feeds = [
               (u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
              ,(u'Politika'         , u'http://www.alo.rs/rss/politika')
              ,(u'Vesti'            , u'http://www.alo.rs/rss/vesti')
              ,(u'Sport'            , u'http://www.alo.rs/rss/sport')
              ,(u'Ljudi'            , u'http://www.alo.rs/rss/ljudi')
              ,(u'Saveti'           , u'http://www.alo.rs/rss/saveti')
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    def print_version(self, url):
        artl = url.rpartition('/')[0]
        artid = artl.rpartition('/')[2]
        return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
    def image_url_processor(self, baseurl, url):
        return url.replace('alo.rs//','alo.rs/')
--- a/resources/recipes/anchorage_daily.recipe
+++ b/resources/recipes/anchorage_daily.recipe
@ -0,0 +1,40 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1278347258(BasicNewsRecipe):
    title          = u'Anchorage Daily News'
    __author__ = 'rty'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
 	(u'Business', u'http://www.adn.com/money/index.xml'),
 	(u'Sports', u'http://www.adn.com/sports/index.xml'),
 	(u'Politics', u'http://www.adn.com/politics/index.xml'),
 	(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
 	(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
 	]
    description           = ''''Alaska's Newspaper'''
    publisher             = 'http://www.adn.com'
    category              = 'news, Alaska, Anchorage'
    language = 'en'
    extra_css = '''
                p{font-weight: normal;text-align: justify}
               '''
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    encoding               = 'latin-1'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
    keep_only_tags = [
                              dict(name='div', attrs={'class':'left_col story_mainbar'}),
                               ]
    remove_tags = [
                              dict(name='div', attrs={'class':'story_tools'}),
                              dict(name='p', attrs={'class':'ad_label'}),
                               ]
    remove_tags_after = [
                              dict(name='div', attrs={'class':'advertisement'}),
                               ]
--- a/resources/recipes/auto_prove.recipe
+++ b/resources/recipes/auto_prove.recipe
@ -0,0 +1,90 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __author__      = 'GabrieleMarini, based on Darko Miletic'
 __copyright__   = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
 __version__     = 'v1.02 Marini Gabriele '
 __date__        = '10, January 2010'
 __description__ = 'Italian daily newspaper'
 '''
 http://www.corrieredellosport.it/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class AutoPR(BasicNewsRecipe):
    __author__     = 'Gabriele Marini'
    description    = 'Auto and Formula 1'
    cover_url      = 'http://www.auto.it/res/imgs/logo_Auto.png'
    title          = u'Auto Prove'
    publisher      = 'CONTE Editore'
    category       = 'Sport'
    language       = 'it'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 60
    max_articles_per_feed = 20
    use_embedded_content  = False
    recursion             = 100
    remove_javascript = True
    no_stylesheets = True
    #html2lrf_options = [
    #                      '--comment', description
    #                    , '--category', category
    #                    , '--publisher', publisher
    #                    , '--ignore-tables'
    #                    ]
    #html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
    keep_only_tags = [
                      dict(name='h2', attrs={'class':['tit_Article y_Txt']}),
                      dict(name='h2', attrs={'class':['tit_Article']}),
                      dict(name='div', attrs={'class':['box_Img newsdet_new ']}),
                      dict(name='div', attrs={'class':['box_Img newsdet_as ']}),
                      dict(name='table', attrs={'class':['table_A']}),
                      dict(name='div', attrs={'class':['txt_Article txtBox_cms']}),
                      dict(name='testoscheda')]
    def parse_index(self):
        feeds = []
        for title, url in [
             ("Prove su Strada"  , "http://www.auto.it/rss/prove+6.xml")
            ]:
            soup = self.index_to_soup(url)
            soup = soup.find('channel')
            print soup
            for article in soup.findAllNext('item'):
                title =  self.tag_to_string(article.title)
                date = self.tag_to_string(article.pubDate)
                description = self.tag_to_string(article.description)
                link = self.tag_to_string(article.guid)
 #               print article
                articles = self.create_links_append(link, date, description)
                if articles:
                   feeds.append((title, articles))
            return feeds
    def create_links_append(self, link, date, description):
        current_articles = []
        current_articles.append({'title': 'Generale', 'url': link,'description':description, 'date':date}),
        current_articles.append({'title': 'Design', 'url': link.replace('scheda','design'),'description':'scheda', 'date':''}),
        current_articles.append({'title': 'Interni', 'url': link.replace('scheda','interni'),'description':'Interni', 'date':''}),
        current_articles.append({'title': 'Tecnica', 'url': link.replace('scheda','tecnica'),'description':'Tecnica', 'date':''}),
        current_articles.append({'title': 'Su Strada', 'url': link.replace('scheda','su_strada'),'description':'Su Strada', 'date':''}),
        current_articles.append({'title': 'Pagella', 'url': link.replace('scheda','pagella'),'description':'Pagella', 'date':''}),
        current_articles.append({'title': 'Rilevamenti', 'url': link.replace('scheda','telemetria'),'description':'Rilevamenti', 'date':''})
        return current_articles
--- a/resources/recipes/bbc_chinese.recipe
+++ b/resources/recipes/bbc_chinese.recipe
@ -0,0 +1,39 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277443634(BasicNewsRecipe):
    title          = u'BBC Chinese'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [
 	(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
 	(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
 	(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
 	(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
 	(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
 	(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
 	(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
 	]
    extra_css = '''
    	@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
 	body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
                    h1 {font-family: 'DroidFont', serif;}\n
                    .articledescription {font-family: 'DroidFont', serif;}
            '''
    __author__            = 'rty'
    __version__            = '1.0'
    language = 'zh'
    pubisher  = 'British Broadcasting Corporation'
    description           = 'BBC news in Chinese'
    category              = 'News, Chinese'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    encoding               = 'UTF-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
    keep_only_tags = [
                              dict(name='h1'),
                              dict(name='p', attrs={'class':['primary-topic','summary']}),
                              dict(name='div', attrs={'class':['bodytext','datestamp']}),
                              ]
--- a/resources/recipes/big_oven.recipe
+++ b/resources/recipes/big_oven.recipe
@ -0,0 +1,64 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BigOven(BasicNewsRecipe):
    title               = 'BigOven'
    __author__          = 'Starson17'
    description         = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
    language            = 'en'
    category            = 'news, food, recipes, gourmet'
    publisher           = 'Starson17'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24
    remove_javascript   = True
    remove_empty_feeds    = True
    cover_url           = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
    max_articles_per_feed = 30
    needs_subscription = True
    conversion_options = {'linearize_tables'  : True
                        , 'comment'           : description
                        , 'tags'              : category
                        , 'publisher'         : publisher
                        , 'language'          : language
                        }
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://www.bigoven.com/')
            br.select_form(name='form1')
            br['TopMenu_bo1$email']  = self.username
            br['TopMenu_bo1$password'] = self.password
            br.submit()
        return br
    remove_attributes = ['style', 'font']
    keep_only_tags     = [dict(name='h1')
                          ,dict(name='div', attrs={'class':'img'})
                          ,dict(name='div', attrs={'id':'intro'})
                          ]
    remove_tags = [dict(name='div', attrs={'style':["overflow: visible;"]})
                   ,dict(name='div', attrs={'class':['ctas']})
                   #,dict(name='a', attrs={'class':['edit']})
                   ,dict(name='p', attrs={'class':['byline']})
                   ]
    feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
    def preprocess_html(self, soup):
        for tag in soup.findAll(name='a', attrs={'class':['edit']}):
          tag.parent.extract()
        for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
          tag.replaceWith(tag.string)
        return soup
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		            '''
--- a/resources/recipes/china_economic_net.recipe
+++ b/resources/recipes/china_economic_net.recipe
@ -0,0 +1,39 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1278162597(BasicNewsRecipe):
    __author__            = 'rty'
    title          = u'China Economic Net'
    oldest_article = 7
    max_articles_per_feed = 100
    pubisher  = 'www.ce.cn - China Economic net - Beijing'
    description           = 'China Economic Net Magazine'
    category              = 'Economic News Magazine, Chinese, China'
    feeds          = [
 	(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
 	(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
 	(u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'),
 	(u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'),
 	(u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml')
 	]
    masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif'
    extra_css = '''
    	@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
 	body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
                    h1 {font-family: 'DroidFont', serif;}\n
                    .articledescription {font-family: 'DroidFont', serif;}
            '''
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'zh-cn'
    encoding   = 'gb2312'
    conversion_options = {'linearize_tables':True}
    keep_only_tags = [
 	 dict(name='h1', attrs={'id':'articleTitle'}),
 	 dict(name='div', attrs={'class':'laiyuan'}),
                     dict(name='div', attrs={'id':'articleText'}),
                               ]
--- a/resources/recipes/china_press.recipe
+++ b/resources/recipes/china_press.recipe
@ -0,0 +1,71 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277228948(BasicNewsRecipe):
    title          = u'China Press USA'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__            = 'rty'
    __version__            = '1.0'
    language = 'zh'
    pubisher  = 'www.chinapressusa.com'
    description           = 'Overseas Chinese Network Newspaper in the USA'
    category              = 'News in Chinese, USA'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    #encoding               = 'GB2312'
    encoding               = 'UTF-8'
    conversion_options = {'linearize_tables':True}
    masthead_url ='http://www.chinapressusa.com/common/images/logo.gif'
    extra_css = '''
             @font-face { font-family: "DroidFont", serif, sans-serif;  src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
             body {
                  margin-right: 8pt;
                  font-family: 'DroidFont', serif;}
              h1  {font-family: 'DroidFont', serif, sans-serif}
            .show {font-family: 'DroidFont', serif, sans-serif}
            '''
    feeds          = [
 	(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
 	(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
 	(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
 	]
    keep_only_tags = [
                              dict(name='div', attrs={'class':'show'}),
                               ]
    remove_tags = [
     #               dict(name='table', attrs={'class':'xle'}),
                    dict(name='div', attrs={'class':'time'}),
                         ]
    remove_tags_after = [
                  dict(name='div', attrs={'class':'bank17'}),
         #         dict(name='a', attrs={'class':'ab12'}),
                         ]
    def append_page(self, soup, appendtag, position):
        pager = soup.find('div',attrs={'id':'displaypagenum'})
        if pager:
           nexturl = self.INDEX + pager.a['href']
           soup2 = self.index_to_soup(nexturl)
           texttag = soup2.find('div', attrs={'class':'show'})
           for it in texttag.findAll(style=True):
               del it['style']
           newpos = len(texttag.contents)
           self.append_page(soup2,texttag,newpos)
           texttag.extract()
           appendtag.insert(position,texttag)
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        self.append_page(soup, soup.body, 3)
        pager = soup.find('div',attrs={'id':'displaypagenum'})
        if pager:
           pager.extract()
        return soup
--- a/resources/recipes/editor_and_publisher.recipe
+++ b/resources/recipes/editor_and_publisher.recipe
@ -1,14 +1,29 @@
-import re
+#!/usr/bin/env python
 __license__ = 'GPL v3'
 __copyright__ = '2010 elsuave'
 from calibre.web.feeds.news import BasicNewsRecipe
 class EandP(BasicNewsRecipe):
    title              = u'Editor and Publisher'
-    __author__         = u'Xanthan Gum'
+    __author__         = u'elsuave (modified from Xanthan Gum)'
    description        = 'News about newspapers and journalism.'
    publisher             = 'Editor and Publisher'
    category              = 'news, journalism, industry'
    language = 'en'
-    no_stylesheets     = True
+    max_articles_per_feed = 25
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
    cover_url             = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
    remove_javascript     = True
-    oldest_article = 7
+    html2lrf_options = [
-    max_articles_per_feed = 100
+                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    # Font formatting code borrowed from kwetal
@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe):
                 h2{font-size: large;}
                '''
-    # Delete everything before the article
+    # Keep only div:itemmgap
-    remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
+    keep_only_tags = [
                          dict(name='div', attrs={'class':'itemmgap'})
                          ]
-    # Delete everything after the article
+    # Remove commenting/social media lins
-    preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
+    remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
-                           lambda match: '</body>'),]
+
    feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
             (u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
             (u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
             (u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
             (u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
             (u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]
    feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
             (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
             (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
             (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
             (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
--- a/resources/recipes/estadao.recipe
+++ b/resources/recipes/estadao.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010, elsuave'
 '''
 estadao.com.br
 '''
@ -10,12 +10,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Estadao(BasicNewsRecipe):
    title                 = 'O Estado de S. Paulo'
-    __author__            = 'Darko Miletic'
+    __author__            = 'elsuave (modified from Darko Miletic)'
    description           = 'News from Brasil in Portuguese'
    publisher             = 'O Estado de S. Paulo'
    category              = 'news, politics, Brasil'
    oldest_article        = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 25
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
@ -30,13 +30,14 @@ class Estadao(BasicNewsRecipe):
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-    keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
+    keep_only_tags = [
                          dict(name='div', attrs={'class':['bb-md-noticia','c5']})
                     ]
    remove_tags = [
                     dict(name=['script','object','form','ul'])
-                    ,dict(name='div', attrs={'id':['votacao','estadaohoje']})
+                    ,dict(name='div', attrs={'class':['fnt2 Color_04 bold','right fnt2 innerTop15 dvTmFont','™_01 right outerLeft15','tituloBox','tags']})
-                    ,dict(name='p', attrs={'id':'ctrl_texto'})
+                    ,dict(name='div', attrs={'id':['bb-md-noticia-subcom']})
                    ,dict(name='p', attrs={'class':'texto'})
                  ]
    feeds = [
@ -51,13 +52,12 @@ class Estadao(BasicNewsRecipe):
              ,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
            ]
-    def preprocess_html(self, soup):
+
        ifr = soup.find('iframe')
        if ifr:
           ifr.extract()
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = 'pt'
    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        if '/Multimidia/' not in url:
            return url
--- a/resources/recipes/evz.ro.recipe
+++ b/resources/recipes/evz.ro.recipe
@ -0,0 +1,52 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 evz.ro
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class EVZ_Ro(BasicNewsRecipe):
    title                 = 'evz.ro'
    __author__            = 'Darko Miletic'
    description           = 'News from Romania'
    publisher             = 'evz.ro'
    category              = 'news, politics, Romania'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'ro'
    masthead_url          = 'http://www.evz.ro/fileadmin/images/logo.gif'
    extra_css             = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    preprocess_regexps = [
         (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
        ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
    ]
    remove_tags       = [
                          dict(name=['form','embed','iframe','object','base','link','script','noscript'])
                         ,dict(attrs={'class':['section','statsInfo','email il']})
                         ,dict(attrs={'id'   :'gallery'})
                        ]
    remove_tags_after = dict(attrs={'class':'section'})
    keep_only_tags    = [dict(attrs={'class':'single'})]
    remove_attributes = ['height','width']
    feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/forbes_india.recipe
+++ b/resources/recipes/forbes_india.recipe
@ -0,0 +1,55 @@
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1276934715(BasicNewsRecipe):
    title          = u'Forbes India'
    __author__            = 'rty'
    description           = 'India Edition Forbes'
    publisher             = 'Forbes India'
    category              = 'Business News, Economy, India'
    oldest_article = 7
    max_articles_per_feed = 100
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en_IN'
    temp_files = []
    articles_are_obfuscated = True
    conversion_options = {'linearize_tables':True}
    feeds          = [
       (u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'),
                ]
    extra_css = '''
                   .t-10-gy-l{font-style: italic; font-size: small}
                   .t-30-b-d{font-weight: bold; font-size: xx-large}
                   .t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic}
                   .storycontent{font-size: 4px;font-family: Times New Roman;}
                '''
    remove_tags_before  = dict(name='div', attrs={'class':'pdl10 pdr15'})
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
        response = br.follow_link(url_regex = r'/printcontent/[0-9]+', nr = 0)
        html = response.read()
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    def get_cover_url(self):
        index = 'http://business.in.com/magazine/'
        soup = self.index_to_soup(index)
        for image in soup.findAll('a',{ "class" : "lbOn a-9-b-d" }):
              return image['href']
              #return image['href'] + '.jpg'
        return None
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
           del item['style']
        for item in soup.findAll(width=True):
            del item['width']
        return soup
--- a/resources/recipes/foreign_policy.recipe
+++ b/resources/recipes/foreign_policy.recipe
@ -0,0 +1,45 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.foreignpolicy.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ForeignPolicy(BasicNewsRecipe):
    title                 = 'Foreign Policy'
    __author__            = 'Darko Miletic'
    description           = 'International News'
    publisher             = 'Washingtonpost.Newsweek Interactive, LLC'
    category              = 'news, politics, USA'
    oldest_article        = 31
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags    = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
    remove_tags       = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
    remove_attributes = ['height','width']
    feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
    def print_version(self, url):
        return url + '?print=yes&page=full'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/haaretz_en.recipe
+++ b/resources/recipes/haaretz_en.recipe
@ -1,56 +1,95 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-haaretz.com
+www.haaretz.com
 '''
 import re
 from calibre import strftime
 from time import gmtime
 from calibre.web.feeds.news import BasicNewsRecipe
-class Haaretz_en(BasicNewsRecipe):
+class HaaretzPrint_en(BasicNewsRecipe):
-    title                 = 'Haaretz in English'
+    title                 = 'Haaretz - print edition'
    __author__            = 'Darko Miletic'
-    description           = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. '
+    description           = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
-    publisher             = 'haaretz.com'
+    publisher             = 'Haaretz'
-    category              = 'news, politics, Israel'
+    category              = "news, Haaretz, Israel news, Israel newspapers, Israel business news, Israel financial news, Israeli news,Israeli newspaper, Israeli newspapers, news from Israel, news in Israel, news Israel, news on Israel, newspaper Israel, Israel sports news, Israel diplomacy news"
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en_IL'
    publication_type      = 'newspaper'
-    remove_empty_feeds    = True
+    PREFIX                = 'http://www.haaretz.com'
-    masthead_url          = 'http://www.haaretz.com/images/logos/logoGrey.gif'
+    masthead_url          = PREFIX + '/images/logos/logoGrey.gif'
    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '
    preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
    conversion_options = {
-                          'comment'   : description
+                          'comment'  : description
-                        , 'tags'      : category
+                        , 'tags'     : category
-                        , 'publisher' : publisher
+                        , 'publisher': publisher
-                        , 'language'  : language
+                        , 'language' : language
                        }
-    remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')]
+    keep_only_tags    = [dict(attrs={'id':'threecolumns'})]
-    remove_tags_before = dict(name='h1')
+    remove_attributes = ['width','height']
-    remove_tags_after  = dict(attrs={'id':'innerArticle'})
+    remove_tags       = [
-    keep_only_tags     = [dict(attrs={'id':'content'})]
+                           dict(name=['iframe','link','object','embed'])
                          ,dict(name='div',attrs={'class':'rightcol'})
                        ]
    feeds = [
-              (u'Opinion'               , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false'   )
+              (u'News'          , PREFIX + u'/print-edition/news'         )
-             ,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false')
+             ,(u'Opinion'       , PREFIX + u'/print-edition/opinion'      )
-             ,(u'National'              , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false'       )
+             ,(u'Business'      , PREFIX + u'/print-edition/business'     )
-             ,(u'International'         , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false'      )
+             ,(u'Real estate'   , PREFIX + u'/print-edition/real-estate'  )
-             ,(u'Jewish World'          , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false'        )
+             ,(u'Sports'        , PREFIX + u'/print-edition/sports'       )
-             ,(u'Business'              , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false'     )
+             ,(u'Travel'        , PREFIX + u'/print-edition/travel'       )
-             ,(u'Real Estate'           , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false'      )
+             ,(u'Books'         , PREFIX + u'/print-edition/books'        )
-             ,(u'Features'              , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false'          )
+             ,(u'Food & Wine'   , PREFIX + u'/print-edition/food-wine'    )
-             ,(u'Arts and leisure'      , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false'       )
+             ,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' )
-             ,(u'Books'                 , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false'         )
+             ,(u'Features'      , PREFIX + u'/print-edition/features'     )
             ,(u'Food and Wine'         , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false'      )
             ,(u'Sports'                , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false'          )
            ]
    def print_version(self, url):
        article = url.rpartition('/')[2]
        return 'http://www.haaretz.com/misc/article-print-page/' + article
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll(attrs={'class':'text'}):
                sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
                desc = item.find('p')
                description = ''
                if sp:
                    if desc:
                       description = self.tag_to_string(desc)
                    link        = sp.a
                    url         = self.PREFIX + link['href']
                    title       = self.tag_to_string(link)
                    times        = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
                    articles.append({
                                          'title'      :title
                                         ,'date'       :times
                                         ,'url'        :url
                                         ,'description':description
                                        })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
--- a/resources/recipes/hindu.recipe
+++ b/resources/recipes/hindu.recipe
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-import re
+import time
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheHindu(BasicNewsRecipe):
@ -10,45 +10,41 @@ class TheHindu(BasicNewsRecipe):
    language = 'en_IN'
    oldest_article        = 7
-    __author__            = 'Kovid Goyal and Sujata Raman'
+    __author__            = 'Kovid Goyal'
    max_articles_per_feed = 100
    no_stylesheets = True
-    remove_tags_before = {'name':'font', 'class':'storyhead'}
+    keep_only_tags = [dict(id='content')]
-    preprocess_regexps = [
+    remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
-                (re.compile(r'<!-- story ends -->.*', re.DOTALL),
+            dict(id=['email-section', 'right-column', 'printfooter'])]
-                 lambda match: '</body></html>'),
+
-                          ]
+    extra_css = '.photo-caption { font-size: smaller }'
    extra_css = '''
                .storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
                body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
                '''
    feeds          = [
      (u'Main - Front Page', u'http://www.hindu.com/rss/01hdline.xml'),
      (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
      (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
      (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
      (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
      (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
      (u'Main - Weather / Religion / Crossword / Cartoon',
       u'http://www.hindu.com/rss/10hdline.xml'),
      (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
      (u'Supplement - Literary Review',
       u'http://www.hindu.com/rss/lrhdline.xml'),
      (u'Supplement - Sunday Magazine',
       u'http://www.hindu.com/rss/maghdline.xml'),
      (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
      (u'Supplement - Business Review',
       u'http://www.hindu.com/rss/bizhdline.xml'),
      (u'Supplement - Book Review',
       u'http://www.hindu.com/rss/brhdline.xml'),
      (u'Supplement - Science & Technology',
       u'http://www.hindu.com/rss/setahdline.xml')
      ]
    def postprocess_html(self, soup, first_fetch):
        for t in soup.findAll(['table', 'tr', 'td','center']):
            t.name = 'div'
        return soup
    def parse_index(self):
        today = time.strftime('%Y-%m-%d')
        soup = self.index_to_soup(
                'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
        div = soup.find(id='left-column')
        feeds = []
        current_section = None
        current_articles = []
        for x in div.findAll(['h3', 'div']):
            if current_section and x.get('class', '') == 'tpaper':
                a = x.find('a', href=True)
                if a is not None:
                    current_articles.append({'url':a['href']+'?css=print',
                        'title':self.tag_to_string(a), 'date': '',
                        'description':''})
            if x.name == 'h3':
                if current_section and current_articles:
                    feeds.append((current_section, current_articles))
                current_section = self.tag_to_string(x)
                current_articles = []
        return feeds
--- a/resources/recipes/houston_chronicle.recipe
+++ b/resources/recipes/houston_chronicle.recipe
@ -1,12 +1,15 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 import string, pprint
 from calibre.web.feeds.news import BasicNewsRecipe
 class HoustonChronicle(BasicNewsRecipe):
    title          = u'The Houston Chronicle'
    description    = 'News from Houston, Texas'
-    __author__	   = 'Kovid Goyal and Sujata Raman'
+    __author__	   = 'Kovid Goyal'
    language       = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True
@ -38,54 +41,23 @@ class HoustonChronicle(BasicNewsRecipe):
    def parse_index(self):
-        soup = self.index_to_soup('http://www.chron.com/news/')
+        categories = ['news', 'sports', 'business', 'entertainment', 'life',
-        container = soup.find('table', attrs={'class':'body-columns'})
+                'travel']
        feeds = []
-        current_section = 'Top Stories'
+        for cat in categories:
-        current_articles = []
+            articles = []
-
+            soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
-        self.log('\tFound section:', current_section)
+            for elem in soup.findAll(comptype='story', storyid=True):
-
+                a = elem.find('a', href=True)
-        for div in container.findAll('div'):
+                if a is None: continue
-            if div.get('class', None) == 'module-mast':
+                url = a['href']
-                t = self.tag_to_string(div).replace(u'\xbb', '').strip()
+                if not url.startswith('http://'):
-                if t and 'interactives' not in t:
+                    url = 'http://www.chron.com'+url
-                    if current_section and current_articles:
+                articles.append({'title':self.tag_to_string(a), 'url':url,
-                        feeds.append((current_section, current_articles))
+                    'description':'', 'date':''})
-                    current_section = t
+                pprint.pprint(articles[-1])
-                    current_articles = []
+            if articles:
-                    self.log('\tFound section:', current_section)
+                feeds.append((string.capwords(cat), articles))
            elif div.get('storyid', False):
                a = div.find('a', href=True)
                if a:
                    title = self.tag_to_string(a)
                    url = a.get('href')
                    if title and url:
                        if url.startswith('/'):
                            url = 'http://www.chron.com'+url
                        self.log('\t\tFound article:', title)
                        self.log('\t\t\t', url)
                        current_articles.append({'title':title, 'url':url,
                            'date':'', 'description':''})
            elif div.get('class', None) == 'columnbox' and \
                    'special' in current_section.lower():
                a = div.find('a')
                if a:
                    title = self.tag_to_string(a)
                    url = a.get('href')
                    if title and url:
                        if not url.startswith('/'): continue
                        url = 'http://www.chron.com'+url
                        self.log('\t\tFound article:', title)
                        self.log('\t\t\t', url)
                        a.extract()
                        desc = self.tag_to_string(div)
                        current_articles.append({'title':title, 'url':url,
                            'date':'', 'description':desc})
        if current_section and current_articles:
            feeds.append((current_section, current_articles))
        return feeds
--- a/resources/recipes/ifzm.recipe
+++ b/resources/recipes/ifzm.recipe
@ -0,0 +1,50 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277305250(BasicNewsRecipe):
    title          = u'infzm - China Southern Weekly'
    oldest_article = 14
    max_articles_per_feed = 100
    feeds          = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'),
 	(u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', u'http://www.infzm.com/rss/economic.xml'),
 	(u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', u'http://www.infzm.com/rss/culture.xml'),
 	(u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', u'http://www.infzm.com/rss/lifestyle.xml'),
 	(u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', u'http://www.infzm.com/rss/opinion.xml')
 	]
    __author__            = 'rty'
    __version__            = '1.0'
    language = 'zh'
    pubisher  = 'http://www.infzm.com'
    description           = 'Chinese Weekly Tabloid'
    category              = 'News, China'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    #encoding               = 'GB2312'
    encoding               = 'UTF-8'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg'
    extra_css = '''
             @font-face { font-family: "DroidFont", serif, sans-serif;  src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
             body {
                  margin-right: 8pt;
                  font-family: 'DroidFont', serif;}
             .detailContent {font-family: 'DroidFont', serif, sans-serif}
            '''
    keep_only_tags = [
                              dict(name='div', attrs={'id':'detailContent'}),
                               ]
    remove_tags = [
                    dict(name='div', attrs={'id':['detailTools', 'detailSideL', 'pageNum']}),
                         ]
    remove_tags_after = [
                  dict(name='div', attrs={'id':'pageNum'}),
                         ]
    def preprocess_html(self, soup):
        for item in soup.findAll(color=True):
            del item['font']
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/london_free_press.recipe
+++ b/resources/recipes/london_free_press.recipe
@ -0,0 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonFreePress(BasicNewsRecipe):
    title          = u'London Free Press'
    __author__            = 'rty'
    oldest_article = 4
    max_articles_per_feed = 100
    pubisher  = 'lfpress.com'
    description           = 'Ontario Canada Newspaper'
    category              = 'News, Ontario, Canada'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en_CA'
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    feeds          = [
                    (u'News', u'http://www.lfpress.com/news/rss.xml'),
                    (u'Comment', u'http://www.lfpress.com/comment/rss.xml'),
                    (u'Entertainment', u'http://www.lfpress.com/entertainment/rss.xml '),
                    (u'Money', u'http://www.lfpress.com/money/rss.xml '),
                    (u'Life', u'http://www.lfpress.com/life/rss.xml '),
                    (u'Sports', u'http://www.lfpress.com/sports/rss.xml ')
                        ]
    keep_only_tags = [
                              dict(name='div', attrs={'id':'article'}),
                               ]
    remove_tags = [
                    dict(name='div', attrs={'id':'commentsBottom'}),
                    dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
   	dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
                         ]
    remove_tags_after = [
                  dict(name='div', attrs={'class':'bottomBox clear'}),
                         ]
--- a/resources/recipes/losservatoreromano_it.recipe
+++ b/resources/recipes/losservatoreromano_it.recipe
@ -0,0 +1,48 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.vatican.va/news_services/or/or_quo
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class LOsservatoreRomano_it(BasicNewsRecipe):
    title                = "L'Osservatore Romano"
    __author__           = 'Darko Miletic'
    description          = 'Quiornale quotidiano, politico, religioso del Vaticano'
    publisher            = 'La Santa Sede'
    category             = 'news, politics, religion, Vatican'
    no_stylesheets       = True
    INDEX                = 'http://www.vatican.va'
    FEEDPAGE             = INDEX + '/news_services/or/or_quo/index.html'
    CONTENTPAGE          = INDEX + '/news_services/or/or_quo/text.html'
    use_embedded_content = False
    encoding             = 'cp1252'
    language             = 'it'
    publication_type     = 'newspaper'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    def parse_index(self):
        articles = []
        articles.append({
                          'title'      :self.title
                         ,'date'       :''
                         ,'url'        :self.CONTENTPAGE
                         ,'description':''
                        })
        return [(self.title, articles)]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/lrb.recipe
+++ b/resources/recipes/lrb.recipe
@ -1,6 +1,6 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
@ -8,17 +8,20 @@ lrb.co.uk
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooks(BasicNewsRecipe):
-    title                 = u'London Review of Books'
+    title                 = 'London Review of Books (free)'
-    __author__            = u'Darko Miletic'
+    __author__            = 'Darko Miletic'
-    description           = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
+    description           = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
-    category              = 'news, literature, England'
+    category              = 'news, literature, UK'
-    publisher             = 'London Review of Books'
+    publisher             = 'LRB ltd.'
-    oldest_article        = 7
+    oldest_article        = 15
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
    conversion_options = {
                             'comments'  : description
@ -27,13 +30,16 @@ class LondonReviewOfBooks(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }
-    keep_only_tags = [dict(name='div' , attrs={'id'   :'main'})]
+    keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
-    remove_tags = [
+    remove_attributes = ['width','height']
                    dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
                   ,dict(name='div' , attrs={'id'   :['mainmenu','precontent','otherarticles']     })
                   ,dict(name='span', attrs={'class':['inlineright','article-icons']})
                   ,dict(name='ul'  , attrs={'class':'article-controls'})
                   ,dict(name='p'   , attrs={'class':'meta-info'       })
                  ]
    feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup('http://www.lrb.co.uk/')
        cover_item = soup.find('p',attrs={'class':'cover'})
        if cover_item:
           cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
        return cover_url
--- a/resources/recipes/lrb_payed.recipe
+++ b/resources/recipes/lrb_payed.recipe
@ -0,0 +1,75 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class LondonReviewOfBooksPayed(BasicNewsRecipe):
    title                 = 'London Review of Books'
    __author__            = 'Darko Miletic'
    description           = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
    category              = 'news, literature, UK'
    publisher             = 'LRB Ltd.'
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    delay                 = 1
    use_embedded_content  = False
    encoding              = 'utf-8'
    INDEX                 = 'http://www.lrb.co.uk'
    LOGIN                 = INDEX + '/login'
    masthead_url          = INDEX + '/assets/images/lrb_logo_big.gif'
    needs_subscription    = True
    publication_type      = 'magazine'
    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
            br.select_form(nr=1)
            br['username'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        cover_item = soup.find('p',attrs={'class':'cover'})
        lrbtitle = self.title
        if  cover_item:
            self.cover_url = self.INDEX + cover_item.a.img['src']
            content = self.INDEX + cover_item.a['href']
            soup2 = self.index_to_soup(content)
            sitem = soup2.find(attrs={'class':'article-list'})
            lrbtitle = soup2.head.title.string
            for item in sitem.findAll('a',attrs={'class':'title'}):
                description = u''
                title_prefix = u''
                feed_link = item
                if feed_link.has_key('href'):
                    url   = self.INDEX + feed_link['href']
                    title = title_prefix + self.tag_to_string(feed_link)
                    date  = strftime(self.timefmt)
                    articles.append({
                                      'title'      :title
                                     ,'date'       :date
                                     ,'url'        :url
                                     ,'description':description
                                    })
        return [(lrbtitle, articles)]
    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
                            ,'publisher' : publisher
                         }
    keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
    remove_attributes = ['width','height']
--- a/resources/recipes/maximum_pc.recipe
+++ b/resources/recipes/maximum_pc.recipe
@ -0,0 +1,43 @@
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1276930924(BasicNewsRecipe):
    title          = u'Maximum PC'
    __author__           = 'rty'
    description           = 'Maximum PC'
    publisher             = 'http://www.maximumpc.com'
    category              = 'news, computer, technology'
    language = 'en'
    oldest_article = 30
    max_articles_per_feed = 100
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    temp_files = []
    articles_are_obfuscated = True
    feeds          = [(u'News', u'http://www.maximumpc.com/articles/4/feed'),
                          (u'Reviews', u'http://www.maximumpc.com/articles/40/feed'),
                          (u'Editors Blog', u'http://www.maximumpc.com/articles/6/feed'),
                          (u'How-to', u'http://www.maximumpc.com/articles/32/feed'),
                          (u'Features', u'http://www.maximumpc.com/articles/31/feed'),
                           (u'From the Magazine', u'http://www.maximumpc.com/articles/72/feed')
                         ]
    keep_only_tags = [
                               dict(name='div', attrs={'class':['print-title','article_body']}),
                              ]
    remove_tags = [
                    dict(name='div', attrs={'class':'comments-tags-actions'}),
                          ]
    remove_tags_before  = dict(name='div', attrs={'class':'print-title'})
    remove_tags_after     = dict(name='div', attrs={'class':'meta-content'})
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
        response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
        html = response.read()
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
--- a/resources/recipes/national_post.recipe
+++ b/resources/recipes/national_post.recipe
@ -7,18 +7,18 @@ class NYTimes(BasicNewsRecipe):
    __author__  = 'Krittika Goyal'
    description = 'Canadian national newspaper'
    timefmt = ' [%d %b, %Y]'
    needs_subscription = False
    language = 'en_CA'
    needs_subscription = False
    no_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
-    #remove_tags_after  = dict(name='td', attrs={'class':'newptool1'})
+    remove_tags_after  = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
    remove_tags = [
       dict(name='iframe'),
-       dict(name='div', attrs={'class':'story-tools'}),
+       dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
       #dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
       #dict(name='form', attrs={'onsubmit':''}),
-       #dict(name='table', attrs={'cellspacing':'0'}),
+       dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
    ]
   # def preprocess_html(self, soup):
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
    def parse_index(self):
            soup = self.nejm_get_index()
-            div = soup.find(id='LegoText4')
+            div = soup.find(id='npContentMain')
            current_section = None
            current_articles = []
@ -50,7 +50,7 @@ class NYTimes(BasicNewsRecipe):
                    current_section = self.tag_to_string(x)
                    current_articles = []
                    self.log('\tFound section:', current_section)
-                if current_section is not None and x.name == 'h3':
+                if current_section is not None and x.name == 'h5':
                    # Article found
                    title = self.tag_to_string(x)
                    a = x.find('a', href=lambda x: x and 'story' in x)
@ -59,8 +59,8 @@ class NYTimes(BasicNewsRecipe):
                    url = a.get('href', False)
                    if not url or not title:
                        continue
-                    if url.startswith('story'):
+                    #if url.startswith('story'):
-                         url = 'http://www.nationalpost.com/todays-paper/'+url
+                    url = 'http://www.nationalpost.com/todays-paper/'+url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    current_articles.append({'title': title, 'url':url,
@ -70,28 +70,11 @@ class NYTimes(BasicNewsRecipe):
                feeds.append((current_section, current_articles))
            return feeds
    def preprocess_html(self, soup):
-        story = soup.find(name='div', attrs={'class':'triline'})
+        story = soup.find(name='div', attrs={'id':'npContentMain'})
-        page2_link = soup.find('p','pagenav')
+        ##td = heading.findParent(name='td')
-        if page2_link:
+        ##td.extract()
            atag = page2_link.find('a',href=True)
            if atag:
                page2_url = atag['href']
                if page2_url.startswith('story'):
                         page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
                elif page2_url.startswith( '/todays-paper/story.html'):
                    page2_url = 'http://www.nationalpost.com/'+page2_url
                page2_soup = self.index_to_soup(page2_url)
                if page2_soup:
                    page2_content = page2_soup.find('div','story-content')
                    if page2_content:
                        full_story = BeautifulSoup('<div></div>')
                        full_story.insert(0,story)
                        full_story.insert(1,page2_content)
                        story = full_story
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@ -32,15 +32,16 @@ class NewScientist(BasicNewsRecipe):
                        }
    preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
-    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]
+    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
    remove_tags = [
                     dict(name='div'  , attrs={'class':['hldBd','adline','pnl','infotext' ]})
-                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools']})
+                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools','comments','blgsocial']})
                    ,dict(name='p'    , attrs={'class':['marker','infotext'               ]})
                    ,dict(name='meta' , attrs={'name' :'description'                       })
                    ,dict(name='a'    , attrs={'rel'  :'tag'                                })
                  ]
-    remove_tags_after = dict(attrs={'class':'nbpcopy'})
+    remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
    remove_attributes = ['height','width']
    feeds          = [
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -17,6 +17,7 @@ class NYTimes(BasicNewsRecipe):
    title       = 'New York Times Top Stories'
    __author__  = 'GRiker'
    language = 'en'
    requires_version = (0, 7, 5)
    description = 'Top Stories from the New York Times'
    # List of sections typically included in Top Stories.  Use a keyword from the
@ -64,6 +65,7 @@ class NYTimes(BasicNewsRecipe):
    timefmt = ''
    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')
    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
@ -77,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
                            'doubleRule',
                            'dottedLine',
                            'entry-meta',
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'module box nav',
@ -86,6 +89,7 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
                            'subNavigation clearfix',
                            'subNavigation tabContent active',
                            'subNavigation tabContent active clearfix',
                            ]}),
@ -108,6 +112,7 @@ class NYTimes(BasicNewsRecipe):
                            'navigation',
                            'portfolioInline',
                            'relatedArticles',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
@ -183,6 +188,16 @@ class NYTimes(BasicNewsRecipe):
                self.log("\nFailed to login")
        return br
    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(True, {'name':'skip'})
        if skip_tag is not None:
            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
            url += '?pagewanted=all'
            self.log.warn("Skipping ad to article at '%s'" % url)
            return self.index_to_soup(url, raw=True)
    def get_cover_url(self):
        cover = None
        st = time.localtime()
@ -391,14 +406,6 @@ class NYTimes(BasicNewsRecipe):
        return ans
    def preprocess_html(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(True, {'name':'skip'})
        if skip_tag is not None:
            self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
            url += '?pagewanted=all'
            self.log.error("Skipping ad to article at '%s'" % url)
            soup = self.index_to_soup(url)
        return self.strip_anchors(soup)
    def postprocess_html(self,soup, True):
@ -454,8 +461,10 @@ class NYTimes(BasicNewsRecipe):
                if mp_off >= 0:
                    c = c[:mp_off]
                emTag.insert(0, c)
-                hrTag = Tag(soup, 'hr')
+                #hrTag = Tag(soup, 'hr')
-                #hrTag['style'] = "margin-top:0em;margin-bottom:0em"
+                #hrTag['class'] = 'caption_divider'
                hrTag = Tag(soup, 'div')
                hrTag['class'] = 'divider'
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -13,13 +13,14 @@ Story
 import re, string, time
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
 class NYTimes(BasicNewsRecipe):
    title       = 'The New York Times'
    __author__  = 'GRiker'
    language = 'en'
    requires_version = (0, 7, 5)
    description = 'Daily news from the New York Times (subscription version)'
    allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
@ -65,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
                            'doubleRule',
                            'dottedLine',
                            'entry-meta',
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'module box nav',
@ -74,6 +76,7 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
                            'subNavigation clearfix',
                            'subNavigation tabContent active',
                            'subNavigation tabContent active clearfix',
                            ]}),
@ -96,6 +99,7 @@ class NYTimes(BasicNewsRecipe):
                            'navigation',
                            'portfolioInline',
                            'relatedArticles',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
@ -103,6 +107,7 @@ class NYTimes(BasicNewsRecipe):
                            ]),
                   dict(name=['script', 'noscript', 'style'])]
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')
    no_stylesheets = True
    extra_css = '.headline      {text-align:    left;}\n    \
                 .byline        {font-family:   monospace;  \
@ -158,7 +163,7 @@ class NYTimes(BasicNewsRecipe):
        return cover
    def get_masthead_title(self):
-        return 'NYTimes GR Version'
+        return self.title
    def dump_ans(self, ans):
        total_article_count = 0
@ -279,15 +284,17 @@ class NYTimes(BasicNewsRecipe):
        self.dump_ans(ans)
        return ans
-    def preprocess_html(self, soup):
+    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(True, {'name':'skip'})
        if skip_tag is not None:
-            self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
            url += '?pagewanted=all'
-            self.log.error("Skipping ad to article at '%s'" % url)
+            self.log.warn("Skipping ad to article at '%s'" % url)
-            soup = self.index_to_soup(url)
+            return self.index_to_soup(url, raw=True)
    def preprocess_html(self, soup):
        return self.strip_anchors(soup)
    def postprocess_html(self,soup, True):
@ -329,7 +336,7 @@ class NYTimes(BasicNewsRecipe):
                        self.log(">>> No class:'columnGroup  first' found <<<")
        # Change class="kicker" to <h3>
        kicker = soup.find(True, {'class':'kicker'})
-        if kicker and kicker.contents[0]:
+        if kicker and kicker.contents and kicker.contents[0]:
            h3Tag = Tag(soup, "h3")
            h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
                             use_alt=False)))
@ -344,8 +351,10 @@ class NYTimes(BasicNewsRecipe):
                if mp_off >= 0:
                    c = c[:mp_off]
                emTag.insert(0, c)
-                hrTag = Tag(soup, 'hr')
+                #hrTag = Tag(soup, 'hr')
-                #hrTag['style'] = "margin-top:0em;margin-bottom:0em"
+                #hrTag['class'] = 'caption_divider'
                hrTag = Tag(soup, 'div')
                hrTag['class'] = 'divider'
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)
@ -413,12 +422,11 @@ class NYTimes(BasicNewsRecipe):
        return soup
-    def postprocess_book(self, oeb, opts, log) :
+    def populate_article_metadata(self,article,soup,first):
-        print "\npostprocess_book()\n"
+        '''
-
+        Extract author and description from article, add to article metadata
-        def extract_byline(href) :
+        '''
-            # <meta name="byline" content=
+        def extract_author(soup):
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            byline = soup.find('meta',attrs={'name':['byl','CLMST']})
            if byline :
                author = byline['content']
@ -428,50 +436,34 @@ class NYTimes(BasicNewsRecipe):
                if byline:
                    author = byline.renderContents()
                else:
                    print "couldn't find byline in %s" % href
                    print soup.prettify()
                    return None
-            # Kill commas - Kindle switches to '&'
+            return author
            return re.sub(',','',author)
-        def extract_description(href) :
+        def extract_description(soup):
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            description = soup.find('meta',attrs={'name':['description','description ']})
            if description :
 #                print repr(description['content'])
 #                print self.massageNCXText(description['content'])
                return self.massageNCXText(description['content'])
            else:
                # Take first paragraph of article
-                articleBody = soup.find('div',attrs={'id':'articleBody'})
+                articlebody = soup.find('div',attrs={'id':'articlebody'})
-                if not articleBody:
+                if not articlebody:
                    # Try again with class instead of id
-                    articleBody = soup.find('div',attrs={'class':'articleBody'})
+                    articlebody = soup.find('div',attrs={'class':'articlebody'})
-                    if not articleBody:
+                    if not articlebody:
-                        print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:'
+                        print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
                        print soup.prettify()
                        return None
-                paras = articleBody.findAll('p')
+                paras = articlebody.findAll('p')
                for p in paras:
                    if p.renderContents() > '' :
                        return self.massageNCXText(self.tag_to_string(p,use_alt=False))
                return None
-        # Method entry point here
+        if not article.author:
-        # Single section toc looks different than multi-section tocs
+            article.author = extract_author(soup)
-        if oeb.toc.depth() == 2 :
+        if not article.summary:
-            for article in oeb.toc :
+            article.summary = article.text_summary = extract_description(soup)
                if article.author is None :
                    article.author = extract_byline(article.href)
                if article.description is None :
                    article.description = extract_description(article.href).decode('utf-8')
        elif oeb.toc.depth() == 3 :
            for section in oeb.toc :
                for article in section :
                    if article.author is None :
                        article.author = extract_byline(article.href)
                    if article.description is None :
                        article.description = extract_description(article.href)
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
--- a/resources/recipes/oldnewthing.recipe
+++ b/resources/recipes/oldnewthing.recipe
@ -28,7 +28,7 @@ class OldNewThing(BasicNewsRecipe):
                        }
    remove_attributes = ['width','height']
-    keep_only_tags    = [dict(attrs={'class':['postsub','comment']})]
+    keep_only_tags    = [dict(attrs={'class':'full-post'})]
-
+    remove_tags       = [dict(attrs={'class':['post-attributes','post-tags','post-actions']})]
    feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]
--- a/resources/recipes/people_daily.recipe
+++ b/resources/recipes/people_daily.recipe
@ -0,0 +1,57 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277129332(BasicNewsRecipe):
    title          = u'People Daily - China'
    oldest_article = 2
    max_articles_per_feed = 100
    __author__            = 'rty'
    pubisher  = 'people.com.cn'
    description           = 'People Daily Newspaper'
    language = 'zh'
    category              = 'News, China'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    encoding               = 'GB2312'
    conversion_options = {'linearize_tables':True}
    feeds          = [(u'\u56fd\u5185\u65b0\u95fb', u'http://www.people.com.cn/rss/politics.xml'),
       (u'\u56fd\u9645\u65b0\u95fb', u'http://www.people.com.cn/rss/world.xml'),
       (u'\u7ecf\u6d4e\u65b0\u95fb', u'http://www.people.com.cn/rss/finance.xml'),
       (u'\u4f53\u80b2\u65b0\u95fb', u'http://www.people.com.cn/rss/sports.xml'),
       (u'\u53f0\u6e7e\u65b0\u95fb', u'http://www.people.com.cn/rss/haixia.xml')]
    keep_only_tags = [
                              dict(name='div', attrs={'class':'left_content'}),
                               ]
    remove_tags = [
                    dict(name='table', attrs={'class':'title'}),
                         ]
    remove_tags_after = [
                  dict(name='table', attrs={'class':'bianji'}),
                         ]
    def append_page(self, soup, appendtag, position):
        pager = soup.find('img',attrs={'src':'/img/next_b.gif'})
        if pager:
           nexturl = self.INDEX + pager.a['href']
           soup2 = self.index_to_soup(nexturl)
           texttag = soup2.find('div', attrs={'class':'left_content'})
           #for it in texttag.findAll(style=True):
           #   del it['style']
           newpos = len(texttag.contents)
           self.append_page(soup2,texttag,newpos)
           texttag.extract()
           appendtag.insert(position,texttag)
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="content-type" content="text/html;charset=GB2312" />\n<meta http-equiv="content-language" content="utf-8" />'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['form']
        self.append_page(soup, soup.body, 3)
        #pager = soup.find('a',attrs={'class':'ab12'})
        #if pager:
        #   pager.extract()
        return soup
--- a/resources/recipes/psych.recipe
+++ b/resources/recipes/psych.recipe
@ -1,39 +1,44 @@
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-class PsychologyToday(BasicNewsRecipe):
+class AdvancedUserRecipe1275708473(BasicNewsRecipe):
    title          = u'Psychology Today'
-    language       = 'en'
+    _author__ = 'rty'
-    __author__     = 'Krittika Goyal'
+    publisher = u'www.psychologytoday.com'
-    oldest_article = 1 #days
+    category = u'Psychology'
-    max_articles_per_feed = 25
+    max_articles_per_feed = 100
-    #encoding = 'latin1'
+    remove_javascript = True
-
+    use_embedded_content   = False
-    remove_stylesheets = True
+    no_stylesheets = True
-    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    language = 'en'
-    #remove_tags_after  = dict(name='td', attrs={'class':'newptool1'})
+    temp_files = []
    articles_are_obfuscated = True
    remove_tags = [
-       dict(name='iframe'),
+                    dict(name='div', attrs={'class':['print-source_url','field-items','print-footer']}),
-       dict(name='div', attrs={'class':['pt-box-title', 'pt-box-content', 'blog-entry-footer', 'item-list', 'article-sub-meta']}),
+                    dict(name='span', attrs={'class':'print-footnote'}),
-       dict(name='div', attrs={'id':['block-td_search_160', 'block-cam_search_160']}),
+                  ]
-       #dict(name='ul', attrs={'class':'article-tools'}),
+    remove_tags_before  = dict(name='h1', attrs={'class':'print-title'})
-       #dict(name='ul', attrs={'class':'articleTools'}),
+    remove_tags_after     = dict(name='div', attrs={'class':['field-items','print-footer']})
    ]
-    feeds          = [
+    feeds          = [(u'Contents', u'http://www.psychologytoday.com/articles/index.rss')]
 ('PSY TODAY',
 'http://www.psychologytoday.com/articles/index.rss'),
 ]
-    def preprocess_html(self, soup):
+    def get_article_url(self, article):
-        story = soup.find(name='div', attrs={'id':'contentColumn'})
+       return article.get('link',  None)
-        #td = heading.findParent(name='td')
+
-        #td.extract()
+    def get_obfuscated_article(self, url):
-        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        br = self.get_browser()
-        body = soup.find(name='body')
+        br.open(url)
-        body.insert(0, story)
+        response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
-        for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x):
+        html = response.read()
-             p = x.findParent('p')
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-             if p is not None:
+        self.temp_files[-1].write(html)
-                  p.extract()
+        self.temp_files[-1].close()
-        return soup
+        return self.temp_files[-1].name
    def get_cover_url(self):
        index = 'http://www.psychologytoday.com/magazine/'
        soup = self.index_to_soup(index)
        for image in soup.findAll('img',{ "class" : "imagefield imagefield-field_magazine_cover" }):
              return image['src'] + '.jpg'
        return None
--- a/resources/recipes/singtao_daily.recipe
+++ b/resources/recipes/singtao_daily.recipe
@ -0,0 +1,79 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class AdvancedUserRecipe1278063072(BasicNewsRecipe):
    title          = u'Singtao Daily - Canada'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__            = 'rty'
    description           = 'Toronto Canada Chinese Newspaper'
    publisher             = 'news.singtao.ca'
    category              = 'Chinese, News, Canada'
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'zh'
    conversion_options = {'linearize_tables':True}
    masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg'
    extra_css = '''
        @font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\
    body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\
                    h1 {font-family: 'DroidFont', serif;}\
                    .articledescription {font-family: 'DroidFont', serif;}
            '''
    keep_only_tags = [
    dict(name='div', attrs={'id':['title','storybody']}),
    dict(name='div', attrs={'class':'content'})
    ]
    def parse_index(self):
            feeds = []
            for title, url in [
               ('Editorial',
                   'http://news.singtao.ca/toronto/editorial.html'),
               ('Toronto   \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'),
                   'http://news.singtao.ca/toronto/city.html'),
               ('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'),
                   'http://news.singtao.ca/toronto/canada.html'),
               ('Entertainment',
                   'http://news.singtao.ca/toronto/entertainment.html'),
               ('World',
                   'http://news.singtao.ca/toronto/world.html'),
               ('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'),
                   'http://news.singtao.ca/toronto/finance.html'),
               ('Sports', 'http://news.singtao.ca/toronto/sports.html'),
                            ]:
               articles = self.parse_section(url)
               if articles:
                   feeds.append((title, articles))
            return feeds
    def parse_section(self, url):
            soup = self.index_to_soup(url)
            div = soup.find(attrs={'class': ['newslist paddingL10T10','newslist3 paddingL10T10']})
            #date = div.find(attrs={'class': 'underlineBLK'})
            current_articles = []
            for li in div.findAll('li'):
                    a = li.find('a', href = True)
                    if a is None:
                        continue
                    title = self.tag_to_string(a)
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    if url.startswith('/'):
                         url = 'http://news.singtao.ca'+url
          #          self.log('\    \    Found article:', title)
          #          self.log('\    \    \    ', url)
                    current_articles.append({'title': title, 'url': url, 'description':''})
            return current_articles
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
           del item['style']
        for item in soup.findAll(width=True):
           del item['width']
        return soup
--- a/resources/recipes/statesman.recipe
+++ b/resources/recipes/statesman.recipe
@ -0,0 +1,35 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1278049615(BasicNewsRecipe):
    title          = u'Statesman'
    pubisher  = 'http://www.statesman.com/'
    description           = 'Austin Texas Daily Newspaper'
    category              = 'News, Austin, Texas'
    __author__            = 'rty'
    oldest_article = 3
    max_articles_per_feed = 100
    feeds          = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
 	(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
 	(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
 	(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
 	(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
 	]
    masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
    #temp_files = []
    #articles_are_obfuscated = True
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en'
    encoding               = 'utf-8'
    conversion_options = {'linearize_tables':True}
    remove_tags = [
                    dict(name='div', attrs={'id':'cxArticleOptions'}),
                        ]
    keep_only_tags = [
 	 dict(name='div', attrs={'class':'cxArticleHeader'}),
                     dict(name='div', attrs={'id':'cxArticleBodyText'}),
                               ]
--- a/resources/recipes/today_online.recipe
+++ b/resources/recipes/today_online.recipe
@ -0,0 +1,59 @@
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1276486274(BasicNewsRecipe):
    title          = u'Today Online - Singapore'
    publisher             = 'MediaCorp Press Ltd - Singapore'
    __author__ = 'rty'
    category              = 'news, Singapore'
    oldest_article = 7
    max_articles_per_feed = 100
    remove_javascript = True
    use_embedded_content   = False
    no_stylesheets = True
    language = 'en_SG'
    temp_files = []
    articles_are_obfuscated = True
    masthead_url = 'http://www.todayonline.com/App_Themes/Default/images/icons/TodayOnlineLogo.gif'
    conversion_options = {'linearize_tables':True}
    extra_css = '''
                   .author{font-style: italic; font-size: small}
                   .date{font-style: italic; font-size: small}
                   .Headline{font-weight: bold; font-size: xx-large}
                   .headerStrap{font-weight: bold; font-size: x-large; font-syle: italic}
                   .bodyText{font-size: 4px;font-family: Times New Roman;}
                '''
    keep_only_tags = [
                                       dict(name='div', attrs={'id':['fullPrintBodyHolder']})
                        ]
    remove_tags_after = [  dict(name='div', attrs={'class':'button'})]
    remove_tags = [
                    dict(name='div', attrs={'class':['url','button']})
                         ]
    feeds          = [
                         (u'Singapore', u'http://www.todayonline.com/RSS/Singapore'),
                         (u'Hot News', u'http://www.todayonline.com/RSS/Hotnews'),
                        (u'Today Online', u'http://www.todayonline.com/RSS/Todayonline'),
                        (u'Voices', u'http://www.todayonline.com/RSS/Voices'),
                        (u'Commentary', u'http://www.todayonline.com/RSS/Commentary'),
                        (u'World', u'http://www.todayonline.com/RSS/World'),
                        (u'Business', u'http://www.todayonline.com/RSS/Business'),
                        (u'Column', u'http://www.todayonline.com/RSS/Columns'),
                      ]
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
        response = br.follow_link(url_regex = r'/Print/', nr = 0)
        html = response.read()
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
           del item['style']
        return soup
--- a/resources/recipes/toi.recipe
+++ b/resources/recipes/toi.recipe
@ -1,21 +1,16 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class TimesOfIndia(BasicNewsRecipe):
    title          = u'Times of India'
    language       = 'en_IN'
-    __author__     = 'Krittika Goyal'
+    __author__     = 'Kovid Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
-    remove_stylesheets = True
+    no_stylesheets = True
    keep_only_tags = [dict(attrs={'class':'prttabl'})]
    remove_tags = [
-       dict(name='iframe'),
+            dict(style=lambda x: x and 'float' in x)
       dict(name='td', attrs={'class':'newptool1'}),
       dict(name='div', attrs={'id':'newptool'}),
       dict(name='ul', attrs={'class':'newtabcontent_tabs_new'}),
       dict(name='b', text='Topics'),
       dict(name='span', text=':'),
    ]
    feeds          = [
@ -42,13 +37,8 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
    def print_version(self, url):
        return url + '?prtpage=1'
    def preprocess_html(self, soup):
        heading = soup.find(name='h1', attrs={'class':'heading'})
        td = heading.findParent(name='td')
        td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, td)
        td.name = 'div'
        return soup
--- a/resources/recipes/winnipeg_sun.recipe
+++ b/resources/recipes/winnipeg_sun.recipe
@ -0,0 +1,35 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1277647803(BasicNewsRecipe):
    title          = u'Winnipeg Sun'
    __author__            = 'rty'
    __version__            = '1.0'
    oldest_article = 2
    pubisher  = 'www.winnipegsun.com'
    description           = 'Winnipeg Newspaper'
    category              = 'News, Winnipeg, Canada'
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'UTF-8'
    remove_javascript     = True
    use_embedded_content  = False
    language = 'en_CA'
    feeds          = [
 	(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
 	(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
 	(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
 	(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
 	(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
 	(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
 	]
    keep_only_tags = [
                              dict(name='div', attrs={'id':'article'}),
                              ]
    remove_tags = [
           	dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
    	dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
           	dict(name='div', attrs={'id':'commentsBottom'}),
                      ]
    remove_tags_after = [
                             dict(name='div', attrs={'class':'bottomBox clear'})
                           ]
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -3,15 +3,15 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 import string
 from calibre.web.feeds.news import BasicNewsRecipe
 import copy
 # http://online.wsj.com/page/us_in_todays_paper.html
 class WallStreetJournal(BasicNewsRecipe):
-    title = 'The Wall Street Journal (US)'
+    title = 'The Wall Street Journal'
-    __author__ = 'Kovid Goyal and Sujata Raman'
+    __author__ = 'Kovid Goyal, Sujata Raman, and Joshua Oster-Morris'
    description = 'News and current affairs'
    needs_subscription = True
    language = 'en'
@ -66,7 +66,17 @@ class WallStreetJournal(BasicNewsRecipe):
        return soup
    def wsj_get_index(self):
-        return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html')
+        return self.index_to_soup('http://online.wsj.com/itp')
    def wsj_add_feed(self,feeds,title,url):
        self.log('Found section:', title)
        if url.endswith('whatsnews'):
           articles = self.wsj_find_wn_articles(url)
        else:
           articles = self.wsj_find_articles(url)
        if articles:
           feeds.append((title, articles))
        return feeds  
    def parse_index(self):
        soup = self.wsj_get_index()
@ -75,24 +85,72 @@ class WallStreetJournal(BasicNewsRecipe):
        if date is not None:
            self.timefmt = ' [%s]'%self.tag_to_string(date)
-        sections = {}
+        cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
-        sec_order = []
+        if cov is not None:
            self.cover_url = cov['href']
        feeds = []
        div = soup.find('div', attrs={'class':'itpHeader'})
        div = div.find('ul', attrs={'class':'tab'})
        for a in div.findAll('a', href=lambda x: x and '/itp/' in x):
            pageone = a['href'].endswith('pageone')
            if pageone:
               title = 'Front Section'
               url = 'http://online.wsj.com' + a['href']
               feeds = self.wsj_add_feed(feeds,title,url)
               title = 'What''s News'
               url = url.replace('pageone','whatsnews')             
               feeds = self.wsj_add_feed(feeds,title,url)
            else:
               title = self.tag_to_string(a)
               url = 'http://online.wsj.com' + a['href']
               feeds = self.wsj_add_feed(feeds,title,url)
        return feeds
    def wsj_find_wn_articles(self, url):
        soup = self.index_to_soup(url)
        articles = []
        whats_news = soup.find('div', attrs={'class':lambda x: x and 'whatsNews-simple' in x})
        if whats_news is not None:
          for a in whats_news.findAll('a', href=lambda x: x and '/article/' in x):
            container = a.findParent(['p'])
            meta = a.find(attrs={'class':'meta_sectionName'})
            if meta is not None:
                meta.extract()
            title = self.tag_to_string(a).strip()
            url = a['href']
            desc = ''
            if container is not None:
                desc = self.tag_to_string(container)
            articles.append({'title':title, 'url':url,
                'description':desc, 'date':''})
            self.log('\tFound WN article:', title)
        return articles
    def wsj_find_articles(self, url):
        soup = self.index_to_soup(url)
        whats_news = soup.find('div', attrs={'class':lambda x: x and 'whatsNews-simple' in x})
        if whats_news is not None:
           whats_news.extract()
        articles = []
        flavorarea = soup.find('div', attrs={'class':lambda x: x and 'ahed' in x})
        if flavorarea is not None:        
           flavorstory = flavorarea.find('a', href=lambda x: x and x.startswith('/article'))
           if flavorstory is not None:
              flavorstory['class'] = 'mjLinkItem'
              metapage = soup.find('span', attrs={'class':lambda x: x and 'meta_sectionName' in x})
              if metapage is not None:
                 flavorstory.append( copy.copy(metapage) ) #metapage should always be A1 because that should be first on the page
        for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
            container = a.findParent(['li', 'div'])
            if container.name == 'div':
                section = 'Page One'
            else:
                section = ''
                sec = container.find('a', href=lambda x: x and '/search?' in x)
                if sec is not None:
                    section = self.tag_to_string(sec).strip()
                if not section:
                    h = container.find(['h1','h2','h3','h4','h5','h6'])
                    section = self.tag_to_string(h)
            section = string.capitalize(section).replace('U.s.', 'U.S.')
            if section not in sections:
                sections[section] = []
                sec_order.append(section)
            meta = a.find(attrs={'class':'meta_sectionName'})
            if meta is not None:
                meta.extract()
@ -103,30 +161,14 @@ class WallStreetJournal(BasicNewsRecipe):
            if p is not None:
                desc = self.tag_to_string(p)
-            sections[section].append({'title':title, 'url':url,
+            articles.append({'title':title, 'url':url,
                'description':desc, 'date':''})
-            self.log('Found article:', title)
+            self.log('\tFound article:', title)
-            a.extract()
+        return articles
            for a in container.findAll('a', href=lambda x: x and '/article/'
                    in x):
                url = a['href']
                if not url.startswith('http:'):
                    url = 'http://online.wsj.com'+url
                title = self.tag_to_string(a).strip()
                if not title or title.startswith('['): continue
                if title:
                    sections[section].append({'title':self.tag_to_string(a),
                        'url':url, 'description':'', 'date':''})
                    self.log('\tFound related:', title)
        feeds = [(sec, sections[sec]) for sec in sec_order]
        return feeds
    def cleanup(self):
        self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
--- a/resources/recipes/zaobao.recipe
+++ b/resources/recipes/zaobao.recipe
@ -15,22 +15,22 @@ class ZAOBAO(BasicNewsRecipe):
    no_stylesheets = True
    recursions     = 1
    language = 'zh'
    encoding     = 'gbk'
 #    multithreaded_fetch = True
    keep_only_tags    = [
-						dict(name='table', attrs={'cellpadding':'9'}),
+						dict(name='td', attrs={'class':'text'}),
 						dict(name='table', attrs={'class':'cont'}),
 						dict(name='div', attrs={'id':'content'}),
 						dict(name='span', attrs={'class':'page'}),
                        dict(name='div', attrs={'id':'content'})
 					]
    remove_tags    = [
 						dict(name='table', attrs={'cellspacing':'9'}),
                        dict(name='fieldset'),
                        dict(name='div', attrs={'width':'30%'}),
 					]
-    extra_css      = '\
+    extra_css      = '\n\
            @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\
            body{font-family: serif1, serif}\n\
            .article_description{font-family: serif1, serif}\n\
@ -41,7 +41,10 @@ class ZAOBAO(BasicNewsRecipe):
            .article {font-size:medium}\n\
            .navbar {font-size: small}\n\
            .feed{font-size: medium}\n\
-			.small{font-size: small; padding-right: 8%}\n'
+            .small{font-size: small;padding-right: 8pt}\n\
            .text{padding-right: 8pt}\n\
            p{text-indent: 0cm}\n\
            div#content{padding-right: 10pt}'
    INDEXES		   = [
                       (u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml')
@ -51,27 +54,35 @@ class ZAOBAO(BasicNewsRecipe):
    DESC_SENSE     = u'\u8054\u5408\u65e9\u62a5\u7f51'
    feeds          = [
-                      (u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'),
+                        (u'\u5373\u65f6\u62a5\u9053', u'http://realtime.zaobao.com/news.xml'),
-					  (u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'),
+                        (u'\u4e2d\u56fd\u65b0\u95fb', u'http://www.zaobao.com/zg/zg.xml'),
-					  (u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'),
+                        (u'\u56fd\u9645\u65b0\u95fb', u'http://www.zaobao.com/gj/gj.xml'),
-					  (u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'),
+                        (u'\u4e16\u754c\u62a5\u520a\u6587\u8403', u'http://www.zaobao.com/wencui/wencui.xml'),
-                      (u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'),
+                        (u'\u4e1c\u5357\u4e9a\u65b0\u95fb', u'http://www.zaobao.com/yx/yx.xml'),
-                      (u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'),
+                        (u'\u65b0\u52a0\u5761\u65b0\u95fb', u'http://www.zaobao.com/sp/sp.xml'),
-                      (u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'),
+                        (u'\u4eca\u65e5\u89c2\u70b9', u'http://www.zaobao.com/yl/yl.xml'),
-                      (u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'),
+                        (u'\u4e2d\u56fd\u8d22\u7ecf', u'http://www.zaobao.com/cz/cz.xml'),
-                      (u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'),
+                        (u'\u72ee\u57ce\u8d22\u7ecf', u'http://www.zaobao.com/cs/cs.xml'),
-                      (u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'),
+                        (u'\u5168\u7403\u8d22\u7ecf', u'http://www.zaobao.com/cg/cg.xml'),
-                      (u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'),
+                        (u'\u65e9\u62a5\u4f53\u80b2', u'http://www.zaobao.com/ty/ty.xml'),
-                      (u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
+                        (u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
                    ]
    def preprocess_html(self, soup):
        for tag in soup.findAll(name='a'):
            if tag.has_key('href'):
                tag_url = tag['href']
                if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
                    del tag['href']
        return soup
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(name=['table', 'tr', 'td']):
            tag.name = 'div'
        return soup
    def parse_feeds(self):
-        self.log.debug('ZAOBAO overrided parse_feeds()')
+        self.log_debug(_('ZAOBAO overrided parse_feeds()'))
        parsed_feeds = BasicNewsRecipe.parse_feeds(self)
        for id, obj in enumerate(self.INDEXES):
@ -88,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
                    a_title = self.tag_to_string(a)
                    date = ''
                    description = ''
-                    self.log.debug('adding %s at %s'%(a_title,a_url))
+                    self.log_debug(_('adding %s at %s')%(a_title,a_url))
                    articles.append({
                                    'title':a_title,
                                    'date':date,
@ -97,26 +108,25 @@ class ZAOBAO(BasicNewsRecipe):
                                    })
            pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
-                                     max_articles_per_feed=self.max_articles_per_feed,
+                                     max_articles_per_feed=self.max_articles_per_feed)
                                     log=self.log)
-            self.log.debug('adding %s to feed'%(title))
+            self.log_debug(_('adding %s to feed')%(title))
            for feed in pfeeds:
-                self.log.debug('adding feed: %s'%(feed.title))
+                self.log_debug(_('adding feed: %s')%(feed.title))
                feed.description = self.DESC_SENSE
                parsed_feeds.append(feed)
                for a, article in enumerate(feed):
-                    self.log.debug('added article %s from %s'%(article.title, article.url))
+                    self.log_debug(_('added article %s from %s')%(article.title, article.url))
-                self.log.debug('added feed %s'%(feed.title))
+                self.log_debug(_('added feed %s')%(feed.title))
        for i, feed in enumerate(parsed_feeds):
            # workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
            weired_encoding_detected = False
            if not isinstance(feed.description, unicode) and self.encoding and feed.description:
-                self.log.debug('Feed %s is not encoded correctly, manually replace it'%(feed.title))
+                self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
                feed.description = feed.description.decode(self.encoding, 'replace')
            elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
-                self.log.debug('Feed %s is strangely encoded, manually redo all'%(feed.title))
+                self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
                feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
                weired_encoding_detected = True
@ -138,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
                        article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')
                if article.title == "Untitled article":
-                    self.log.debug('Removing empty article %s from %s'%(article.title, article.url))
+                    self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
                    # remove the article
                    feed.articles[a:a+1] = []
        return parsed_feeds
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@ -406,3 +406,8 @@ img, object, svg|svg {
    width: auto;
    height: auto;
 }
 /* These are needed because ADE renders anchors the same as links */
 a { text-decoration: inherit; color: inherit; cursor: inherit }
 a[href] { text-decoration: underline; color: blue; cursor: pointer }
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -40,13 +40,14 @@ class LinuxFreeze(Command):
                        '/usr/bin/pdftohtml',
                        '/usr/lib/libwmflite-0.2.so.7',
                        '/usr/lib/liblcms.so.1',
                        '/usr/lib/libstlport.so.5.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libunrar.so',
                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
-                        '/usr/lib/libpodofo.so.0.6.99',
+                        '/usr/lib/libpodofo.so.0.8.1',
                        '/lib/libz.so.1',
                        '/lib/libuuid.so.1',
                        '/usr/lib/libtiff.so.3',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -265,6 +265,9 @@ class Py2App(object):
    @flush
    def get_local_dependencies(self, path_to_lib):
        for x in self.get_dependencies(path_to_lib):
            if x.startswith('libpodofo'):
                yield x, x
                continue
            for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
                    '/opt/local/lib/',
                    '/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
@ -397,7 +400,7 @@ class Py2App(object):
    @flush
    def add_podofo(self):
        info('\nAdding PoDoFo')
-        pdf = join(SW, 'lib', 'libpodofo.0.6.99.dylib')
+        pdf = join(SW, 'lib', 'libpodofo.0.8.1.dylib')
        self.install_dylib(pdf)
    @flush
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -162,9 +162,50 @@ SET(WANT_LIB64 FALSE)
 SET(PODOFO_BUILD_SHARED TRUE)
 SET(PODOFO_BUILD_STATIC FALSE)
-cp build/podofo-0.7.0/build/src/Release/podofo.dll bin/
+cp build/podofo/build/src/Release/podofo.dll bin/
-cp build/podofo-0.7.0/build/src/Release/podofo.lib lib/
+cp build/podofo/build/src/Release/podofo.lib lib/
-cp build/podofo-0.7.0/build/src/Release/podofo.exp lib/
+cp build/podofo/build/src/Release/podofo.exp lib/
 cp build/podofo/build/podofo_config.h include/podofo/
 cp -r build/podofo/src/* include/podofo/
 The following patch was required to get it to compile:
 Index: src/PdfImage.cpp
 ===================================================================
 --- src/PdfImage.cpp    (revision 1261)
 +++ src/PdfImage.cpp    (working copy)
@@ -627,7 +627,7 @@
     long lLen = static_cast<long>(pInfo->rowbytes * height);
     char* pBuffer = static_cast<char*>(malloc(sizeof(char) * lLen));
 -    png_bytep pRows[height];
 +    png_bytepp pRows = static_cast<png_bytepp>(malloc(sizeof(png_bytep)*height));
     for(int y=0; y<height; y++)
     {
         pRows[y] = reinterpret_cast<png_bytep>(pBuffer + (y * pInfo->rowbytes));
@@ -672,6 +672,7 @@
     this->SetImageData( width, height, pInfo->bit_depth, &stream );
     free(pBuffer);
 +       free(pRows);
 }
 #endif // PODOFO_HAVE_PNG_LIB
 Index: src/PdfFiltersPrivate.cpp
 ===================================================================
 --- src/PdfFiltersPrivate.cpp   (revision 1261)
 +++ src/PdfFiltersPrivate.cpp   (working copy)
@@ -1019,7 +1019,7 @@
 /*
  * Prepare for input from a memory buffer.
  */
 -GLOBAL(void)
 +void
 jpeg_memory_src (j_decompress_ptr cinfo, const JOCTET * buffer, size_t bufsize)
 {
     my_src_ptr src;
 ImageMagick
 --------------
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -154,6 +154,10 @@
                <CustomAction Id="LaunchApplication" BinaryKey="WixCA"
                    DllEntry="WixShellExec" Impersonate="yes"/>
                <InstallUISequence>
                    <FileCost Suppress="yes" />
                </InstallUISequence>
 		</Product>
 </Wix>
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -30,6 +30,7 @@ mimetypes.add_type('application/epub+zip',                '.epub')
 mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
 mimetypes.add_type('application/xhtml+xml',               '.xhtml')
 mimetypes.add_type('image/svg+xml',                       '.svg')
 mimetypes.add_type('text/fb2+xml',                        '.fb2')
 mimetypes.add_type('application/x-sony-bbeb',             '.lrf')
 mimetypes.add_type('application/x-sony-bbeb',             '.lrx')
 mimetypes.add_type('application/x-dtbncx+xml',            '.ncx')
@ -43,6 +44,7 @@ mimetypes.add_type('application/x-mobipocket-ebook',      '.prc')
 mimetypes.add_type('application/x-mobipocket-ebook',      '.azw')
 mimetypes.add_type('application/x-cbz',                   '.cbz')
 mimetypes.add_type('application/x-cbr',                   '.cbr')
 mimetypes.add_type('application/x-koboreader-ebook',      '.kobo')
 mimetypes.add_type('image/wmf',                           '.wmf')
 guess_type = mimetypes.guess_type
 import cssutils
@ -340,13 +342,6 @@ def detect_ncpus():
    return ans
 def launch(path_or_url):
    from PyQt4.QtCore import QUrl
    from PyQt4.QtGui  import QDesktopServices
    if os.path.exists(path_or_url):
        path_or_url = 'file:'+path_or_url
    QDesktopServices.openUrl(QUrl(path_or_url))
 relpath = os.path.relpath
 _spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
 def english_sort(x, y):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.2'
+__version__   = '0.7.8'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
    author = 'Kovid Goyal'
@ -30,6 +31,7 @@ every time you add an HTML file to the library.\
        with TemporaryDirectory('_plugin_html2zip') as tdir:
            recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
            recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
            if self.site_customization and self.site_customization.strip():
                recs.append(['input_encoding', self.site_customization.strip(),
                    OptionRecommendation.HIGH])
@ -81,7 +83,9 @@ class PML2PMLZ(FileTypePlugin):
        return of.name
 # }}}
 # Metadata reader plugins {{{
 class ComicMetadataReader(MetadataReaderPlugin):
    name = 'Read comic metadata'
@ -319,7 +323,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.zip import get_metadata
        return get_metadata(stream)
 # }}}
 # Metadata writer plugins {{{
 class EPUBMetadataWriter(MetadataWriterPlugin):
@ -395,6 +401,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.topaz import set_metadata
        set_metadata(stream, mi)
 # }}}
 from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.epub.input import EPUBInput
@ -436,7 +443,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
-                BOOQ, ELONEX, POCKETBOOK301
+                BOOQ, ELONEX, POCKETBOOK301, MENTOR
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
@ -444,7 +451,7 @@ from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.android.driver import ANDROID, S60
-from calibre.devices.nokia.driver import N770, N810, E71X
+from calibre.devices.nokia.driver import N770, N810, E71X, E52
 from calibre.devices.eslick.driver import ESLICK, EBK52
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
@ -453,7 +460,7 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
 from calibre.devices.sne.driver import SNE
-from calibre.devices.misc import PALMPRE, AVANT
+from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
@ -461,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
    LibraryThing
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI]
+        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
 plugins += [
    ComicInput,
    EPUBInput,
@ -499,7 +509,6 @@ plugins += [
 ]
 # Order here matters. The first matched device is the one used.
 plugins += [
    ITUNES,
    HANLINV3,
    HANLINV5,
    BLACKBERRY,
@ -520,6 +529,7 @@ plugins += [
    S60,
    N770,
    E71X,
    E52,
    N810,
    COOL_ER,
    ESLICK,
@ -550,6 +560,10 @@ plugins += [
    AZBOOKA,
    FOLDER_DEVICE_FOR_CONFIG,
    AVANT,
    MENTOR,
    SWEEX,
    PDNOVEL,
    ITUNES,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -36,7 +36,7 @@ class Plugin(_Plugin):
        self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
        self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
-
+# Input profiles {{{
 class InputProfile(Plugin):
    author = 'Kovid Goyal'
@ -218,6 +218,8 @@ input_profiles = [InputProfile, SonyReaderInput, SonyReader300Input,
 input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
 # }}}
 class OutputProfile(Plugin):
    author = 'Kovid Goyal'
@ -237,11 +239,12 @@ class OutputProfile(Plugin):
    # If True the MOBI renderer on the device supports MOBI indexing
    supports_mobi_indexing = False
    # Device supports displaying a nested TOC
    supports_nested_toc = True
    # If True output should be optimized for a touchscreen interface
    touchscreen = False
    touchscreen_news_css = ''
    # A list of extra (beyond CSS 2.1) modules supported by the device
    # Format is a cssutils profile dictionary (see iPad for example)
    extra_css_modules = []
    @classmethod
    def tags_to_string(cls, tags):
@ -256,8 +259,151 @@ class iPadOutput(OutputProfile):
    screen_size = (768, 1024)
    comic_screen_size = (768, 1024)
    dpi = 132.0
-    supports_nested_toc = False
+    extra_css_modules = [
        {
            'name':'webkit',
            'props': { '-webkit-border-bottom-left-radius':'{length}',
                '-webkit-border-bottom-right-radius':'{length}',
                '-webkit-border-top-left-radius':'{length}',
                '-webkit-border-top-right-radius':'{length}',
                '-webkit-border-radius': r'{border-width}(\s+{border-width}){0,3}|inherit',
            },
            'macros': {'border-width': '{length}|medium|thick|thin'}
        }
    ]
    touchscreen = True
    # touchscreen_news_css {{{
    touchscreen_news_css = u'''
 			/* hr used in articles */
 			.article_articles_list {
                width:18%;
 				}
            .article_link {
            	color: #593f29;
                font-style: italic;
                }
            .article_next {
 				-webkit-border-top-right-radius:4px;
 				-webkit-border-bottom-right-radius:4px;
                font-style: italic;
                width:32%;
                }
            .article_prev {
 				-webkit-border-top-left-radius:4px;
 				-webkit-border-bottom-left-radius:4px;
                font-style: italic;
                width:32%;
                }
 			.article_sections_list {
                width:18%;
 				}
            .articles_link {
                font-weight: bold;
                }
            .sections_link {
                font-weight: bold;
                }
            .caption_divider {
            	border:#ccc 1px solid;
 				}
            .touchscreen_navbar {
                background:#c3bab2;
                border:#ccc 0px solid;
                border-collapse:separate;
                border-spacing:1px;
                margin-left: 5%;
                margin-right: 5%;
                width: 90%;
                -webkit-border-radius:4px;
                }
            .touchscreen_navbar td {
                background:#fff;
                font-family:Helvetica;
                font-size:80%;
                /* UI touchboxes use 8px padding */
                padding: 6px;
                text-align:center;
                }
 			.touchscreen_navbar td a:link {
 				color: #593f29;
 				text-decoration: none;
 				}
 			/* Index formatting */
 			.publish_date {
 				text-align:center;
 				}
 			.divider {
 				border-bottom:1em solid white;
 				border-top:1px solid gray;
 				}
 			hr.caption_divider {
 				border-color:black;
 				border-style:solid;
 				border-width:1px;
 				}
            /* Feed summary formatting */
            .article_summary {
            	display:inline-block;
            	}
            .feed {
                font-family:sans-serif;
                font-weight:bold;
                font-size:larger;
 				}
            .feed_link {
                font-style: italic;
                }
            .feed_next {
 				-webkit-border-top-right-radius:4px;
 				-webkit-border-bottom-right-radius:4px;
                font-style: italic;
                width:40%;
                }
            .feed_prev {
 				-webkit-border-top-left-radius:4px;
 				-webkit-border-bottom-left-radius:4px;
                font-style: italic;
                width:40%;
                }
            .feed_title {
                text-align: center;
                font-size: 160%;
                }
 			.feed_up {
                font-weight: bold;
                width:20%;
 				}
            .summary_headline {
                font-weight:bold;
                text-align:left;
 				}
            .summary_byline {
                text-align:left;
                font-family:monospace;
 				}
            .summary_text {
                text-align:left;
 				}
        '''
        # }}}
 class SonyReaderOutput(OutputProfile):
@ -279,6 +425,7 @@ class KoboReaderOutput(OutputProfile):
    description = _('This profile is intended for the Kobo Reader.')
    screen_size               = (590, 775)
    comic_screen_size         = (540, 718)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser, prefs
 from calibre.ebooks.epub.fix import ePubFixer
 platform = 'linux'
@ -151,13 +152,13 @@ def reread_filetype_plugins():
 def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
-    occasion = {'import':_on_import, 'preprocess':_on_preprocess,
+    occasion_plugins = {'import':_on_import, 'preprocess':_on_preprocess,
                'postprocess':_on_postprocess}[occasion]
    customization = config['plugin_customization']
    if ft is None:
        ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
    nfp = path_to_file
-    for plugin in occasion.get(ft, []):
+    for plugin in occasion_plugins.get(ft, []):
        if is_disabled(plugin):
            continue
        plugin.site_customization = customization.get(plugin.name, '')
@ -194,7 +195,6 @@ def plugin_customization(plugin):
 # }}}
 # Input/Output profiles {{{
 def input_profiles():
    for plugin in _initialized_plugins:
@ -444,6 +444,14 @@ def device_plugins(): # {{{
                    yield plugin
 # }}}
 # epub fixers {{{
 def epub_fixers():
    for plugin in _initialized_plugins:
        if isinstance(plugin, ePubFixer):
            if not is_disabled(plugin):
                if platform in plugin.supported_platforms:
                    yield plugin
 # }}}
 # Initialize plugins {{{
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -34,6 +34,12 @@ class ANDROID(USBMS):
            # Acer
            0x502 : { 0x3203 : [0x0100]},
            # Dell
            0x413c : { 0xb007 : [0x0100]},
            # Eken?
            0x040d : { 0x0851 : [0x0001]},
            }
    EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -42,11 +48,12 @@ class ANDROID(USBMS):
    EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
-            'GT-I5700', 'SAMSUNG']
+            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD',
-            'PROD_GT-I9000']
+            'GT-I9000', 'FILE-STOR_GADGET']
-    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'PROD_GT-I9000_CARD']
+    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD',
            'FILE-STOR_GADGET']
    OSX_MAIN_MEM = 'HTC Android Phone Media'
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -186,6 +186,15 @@ class BOOQ(EB600):
    WINDOWS_MAIN_MEM = 'EB600'
    WINDOWS_CARD_A_MEM = 'EB600'
 class MENTOR(EB600):
    name = 'Astak Mentor EB600'
    gui_name = 'Mentor'
    description = _('Communicate with the Astak Mentor EB600')
    FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'pdf', 'txt']
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MENTOR'
 class ELONEX(EB600):
    name = 'Elonex 600EB'
--- a/src/calibre/devices/folder_device/driver.py
+++ b/src/calibre/devices/folder_device/driver.py
@ -66,7 +66,7 @@ class FOLDER_DEVICE(USBMS):
              detected_device=None):
        pass
-    def disconnect_from_folder(self):
+    def unmount_device(self):
        self._main_prefix = ''
        self.is_connected = False
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -106,9 +106,11 @@ class BOOX(HANLINV3):
    description    = _('Communicate with the BOOX eBook reader.')
    author         = 'Jesus Manuel Marinho Valcarce'
    supported_platforms = ['windows', 'osx', 'linux']
    METADATA_CACHE = '.metadata.calibre'
    # Ordered list of supported formats
-    FORMATS     = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm']
+    FORMATS     = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
                   'prc', 'chm', 'doc']
    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -24,7 +24,7 @@ class N516(USBMS):
    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
-    BCD         = [0x323, 0x326]
+    BCD         = [0x323, 0x326, 0x327]
    VENDOR_NAME      = 'INGENIC'
    WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -59,7 +59,7 @@ class DevicePlugin(Plugin):
            return cls.__name__
        return cls.name
-
+    # Device detection {{{
    def test_bcd_windows(self, device_id, bcd):
        if bcd is None or len(bcd) == 0:
            return True
@ -152,6 +152,7 @@ class DevicePlugin(Plugin):
                                    return True, dev
        return False, None
    # }}}
    def reset(self, key='-1', log_packets=False, report_progress=None,
            detected_device=None) :
@ -372,14 +373,12 @@ class DevicePlugin(Plugin):
    @classmethod
    def settings(cls):
        '''
-        Should return an opts object. The opts object should have one attribute
+        Should return an opts object. The opts object should have at least one attribute
        `format_map` which is an ordered list of formats for the device.
        '''
        raise NotImplementedError()
 class BookList(list):
    '''
    A list of books. Each Book object must have the fields:
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -213,7 +213,7 @@ class KINDLE_DX(KINDLE2):
    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]
-class Bookmark():
+class Bookmark(): # {{{
    '''
    A simple class fetching bookmark data
    Kindle-specific
@ -429,6 +429,7 @@ class Bookmark():
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                self.pdf_page_offset = 0
                while current_entry < entries:
                    '''
                    location, = unpack('>I', data[e_base+2:e_base+6])
@ -516,3 +517,6 @@ class Bookmark():
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
 # }}}
--- a/src/calibre/devices/kobo/books.py
+++ b/src/calibre/devices/kobo/books.py
@ -0,0 +1,116 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Timothy Legge <timlegge at gmail.com>'
 '''
 '''
 import os
 import re
 import time
 from calibre.ebooks.metadata import MetaInformation
 from calibre.constants import filesystem_encoding, preferred_encoding
 from calibre import isbytestring
 class Book(MetaInformation):
    BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
    JSON_ATTRS = [
        'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
        'title_sort', 'comments', 'category', 'publisher', 'series',
        'series_index', 'rating', 'isbn', 'language', 'application_id',
        'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
        'uuid',
    ]
    def __init__(self, prefix, lpath, title, authors, mime, date, ContentType, thumbnail_name, other=None):
        MetaInformation.__init__(self, '')
        self.device_collections = []
        self._new_book = False
        self.path = os.path.join(prefix, lpath)
        if os.sep == '\\':
            self.path = self.path.replace('/', '\\')
            self.lpath = lpath.replace('\\', '/')
        else:
             self.lpath = lpath
        self.title = title
        if not authors:
            self.authors = ['']
        else:
            self.authors = [authors]
        self.mime = mime
        try:
            self.size = os.path.getsize(self.path)
        except OSError:
            self.size = 0
        try:
            if ContentType == '6':
                self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
            else:
                self.datetime = time.gmtime(os.path.getctime(self.path))
        except:
             self.datetime = time.gmtime()
 	if thumbnail_name is not None:
 	    self.thumbnail = ImageWrapper(thumbnail_name)
        self.tags = []
        if other:
            self.smart_update(other)
    def __eq__(self, other):
        return self.path == getattr(other, 'path', None)
    @dynamic_property
    def db_id(self):
        doc = '''The database id in the application database that this file corresponds to'''
        def fget(self):
            match = re.search(r'_(\d+)$', self.lpath.rpartition('.')[0])
            if match:
                return int(match.group(1))
            return None
        return property(fget=fget, doc=doc)
    @dynamic_property
    def title_sorter(self):
        doc = '''String to sort the title. If absent, title is returned'''
        def fget(self):
            return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
        return property(doc=doc, fget=fget)
    @dynamic_property
    def thumbnail(self):
        return None
    def smart_update(self, other):
        '''
        Merge the information in C{other} into self. In case of conflicts, the information
        in C{other} takes precedence, unless the information in C{other} is NULL.
        '''
        MetaInformation.smart_update(self, other)
        for attr in self.BOOK_ATTRS:
            if hasattr(other, attr):
                val = getattr(other, attr, None)
                setattr(self, attr, val)
    def to_json(self):
        json = {}
        for attr in self.JSON_ATTRS:
            val = getattr(self, attr)
            if isbytestring(val):
                 enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
                 val = val.decode(enc, 'replace')
            elif isinstance(val, (list, tuple)):
                val = [x.decode(preferred_encoding, 'replace') if
                        isbytestring(x) else x for x in val]
            json[attr] = val
        return json
 class ImageWrapper(object):
    def __init__(self, image_path):
       self.image_path = image_path
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -2,17 +2,26 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 import sqlite3 as sqlite
 from calibre.devices.usbms.books import BookList
 from calibre.devices.kobo.books import Book
 from calibre.devices.kobo.books import ImageWrapper
 from calibre.devices.mime import mime_type_ext
 from calibre.devices.usbms.driver import USBMS
 from calibre import prints
 class KOBO(USBMS):
    name = 'Kobo Reader Device Interface'
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
-    author = 'Kovid Goyal'
+    author = 'Timothy Legge and Kovid Goyal'
    version = (1, 0, 4)
    supported_platforms = ['windows', 'osx', 'linux']
@ -29,3 +38,320 @@ class KOBO(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True
    def initialize(self):
        USBMS.initialize(self)
        self.book_class = Book
    def books(self, oncard=None, end_session=True):
        from calibre.ebooks.metadata.meta import path_to_ext
        dummy_bl = BookList(None, None, None)
        if oncard == 'carda' and not self._card_a_prefix:
            self.report_progress(1.0, _('Getting list of books on device...'))
            return dummy_bl
        elif oncard == 'cardb' and not self._card_b_prefix:
            self.report_progress(1.0, _('Getting list of books on device...'))
            return dummy_bl
        elif oncard and oncard != 'carda' and oncard != 'cardb':
            self.report_progress(1.0, _('Getting list of books on device...'))
            return dummy_bl
        prefix = self._card_a_prefix if oncard == 'carda' else \
                 self._card_b_prefix if oncard == 'cardb' \
                 else self._main_prefix
        # get the metadata cache
        bl = self.booklist_class(oncard, prefix, self.settings)
        need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
        # make a dict cache of paths so the lookup in the loop below is faster.
        bl_cache = {}
        for idx,b in enumerate(bl):
            bl_cache[b.lpath] = idx
        def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID):
            changed = False
            # if path_to_ext(path) in self.FORMATS:
            try:
                lpath = path.partition(self.normalize_path(prefix))[2]
                if lpath.startswith(os.sep):
                    lpath = lpath[len(os.sep):]
                    lpath = lpath.replace('\\', '/')
 #                print "LPATH: " + lpath
                path = self.normalize_path(path)
                # print "Normalized FileName: " + path
                idx = bl_cache.get(lpath, None)
                if idx is not None:
                    if ImageID is not None:
                        imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
                        #print "Image name Normalized: " + imagename
                        if imagename is not None:
                            bl[idx].thumbnail = ImageWrapper(imagename)
                    bl_cache[lpath] = None
                    if ContentType != '6':
                        if self.update_metadata_item(bl[idx]):
                            # print 'update_metadata_item returned true'
                            changed = True
                else:
                    book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
                    # print 'Update booklist'
                    if bl.add_book(book, replace_metadata=False):
                        changed = True
            except: # Probably a path encoding error
                import traceback
                traceback.print_exc()
            return changed
        connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
        cursor = connection.cursor()
        #query = 'select count(distinct volumeId) from volume_shortcovers'
        #cursor.execute(query)
        #for row in (cursor):
        #    numrows = row[0]
        #cursor.close()
        query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
                'ImageID from content where BookID is Null'
        cursor.execute (query)
        changed = False
        for i, row in enumerate(cursor):
         #  self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
            path = self.path_from_contentid(row[3], row[5], oncard)
            mime = mime_type_ext(path_to_ext(row[3]))
            if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
                changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
                # print "shortbook: " + path
            elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
                changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
            if changed:
                need_sync = True
        cursor.close()
        connection.close()
        # Remove books that are no longer in the filesystem. Cache contains
        # indices into the booklist if book not in filesystem, None otherwise
        # Do the operation in reverse order so indices remain valid
        for idx in sorted(bl_cache.itervalues(), reverse=True):
            if idx is not None:
                need_sync = True
                del bl[idx]
        #print "count found in cache: %d, count of files in metadata: %d, need_sync: %s" % \
        #      (len(bl_cache), len(bl), need_sync)
        if need_sync: #self.count_found_in_bl != len(bl) or need_sync:
            if oncard == 'cardb':
                self.sync_booklists((None, None, bl))
            elif oncard == 'carda':
                self.sync_booklists((None, bl, None))
            else:
                self.sync_booklists((bl, None, None))
        self.report_progress(1.0, _('Getting list of books on device...'))
        return bl
    def delete_via_sql(self, ContentID, ContentType):
        # Delete Order:
        #    1) shortcover_page
        #    2) volume_shorcover
        #    2) content
        connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
        cursor = connection.cursor()
        t = (ContentID,)
        cursor.execute('select ImageID from content where ContentID = ?', t)
        ImageID = None
        for row in cursor:
            # First get the ImageID to delete the images
            ImageID = row[0]
        cursor.close()
        cursor = connection.cursor()
        if ContentType == 6:
            # Delete the shortcover_pages first
            cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
        #Delete the volume_shortcovers second
        cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
        # Delete the chapters associated with the book next
        t = (ContentID,ContentID,)
        cursor.execute('delete from content where BookID  = ? or ContentID = ?', t)
        connection.commit()
        cursor.close()
        if ImageID != None:
            print "Error condition ImageID was not found"
            print "You likely tried to delete a book that the kobo has not yet added to the database"
        connection.close()
        # If all this succeeds we need to delete the images files via the ImageID
        return ImageID
    def delete_images(self, ImageID):
        if ImageID != None:
            path_prefix = '.kobo/images/'
            path = self._main_prefix + path_prefix + ImageID
            file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed',)
            for ending in file_endings:
                fpath = path + ending
                fpath = self.normalize_path(fpath)
                if os.path.exists(fpath):
                    # print 'Image File Exists: ' + fpath
                    os.unlink(fpath)
    def delete_books(self, paths, end_session=True):
        for i, path in enumerate(paths):
            self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
            path = self.normalize_path(path)
            # print "Delete file normalized path: " + path
            extension =  os.path.splitext(path)[1]
            if extension == '.kobo':
                # Kobo books do not have book files.  They do have some images though
                #print "kobo book"
                ContentType = 6
                ContentID = self.contentid_from_path(path, ContentType)
            elif extension == '.pdf' or extension == '.epub':
                # print "ePub or pdf"
                ContentType = 16
                #print "Path: " + path
                ContentID = self.contentid_from_path(path, ContentType)
                # print "ContentID: " + ContentID
            else: # if extension == '.html' or extension == '.txt':
                ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
                ContentID = self.contentid_from_path(path, ContentType)
            ImageID = self.delete_via_sql(ContentID, ContentType)
            #print " We would now delete the Images for" + ImageID
            self.delete_images(ImageID)
            if os.path.exists(path):
                # Delete the ebook
                # print "Delete the ebook: " + path
                os.unlink(path)
                filepath = os.path.splitext(path)[0]
                for ext in self.DELETE_EXTS:
                    if os.path.exists(filepath + ext):
                        # print "Filename: " + filename
                        os.unlink(filepath + ext)
                    if os.path.exists(path + ext):
                        # print "Filename: " + filename
                        os.unlink(path + ext)
                if self.SUPPORTS_SUB_DIRS:
                    try:
                        # print "removed"
                        os.removedirs(os.path.dirname(path))
                    except:
                        pass
        self.report_progress(1.0, _('Removing books from device...'))
    def remove_books_from_metadata(self, paths, booklists):
        for i, path in enumerate(paths):
            self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
            for bl in booklists:
                for book in bl:
                    #print "Book Path: " + book.path
                    if path.endswith(book.path):
                        #print "    Remove: " + book.path
                        bl.remove_book(book)
        self.report_progress(1.0, _('Removing books from device metadata listing...'))
    def add_books_to_metadata(self, locations, metadata, booklists):
        metadata = iter(metadata)
        for i, location in enumerate(locations):
            self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
            info = metadata.next()
            blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
            # Extract the correct prefix from the pathname. To do this correctly,
            # we must ensure that both the prefix and the path are normalized
            # so that the comparison will work. Book's __init__ will fix up
            # lpath, so we don't need to worry about that here.
            path = self.normalize_path(location[0])
            if self._main_prefix:
                prefix = self._main_prefix if \
                           path.startswith(self.normalize_path(self._main_prefix)) else None
            if not prefix and self._card_a_prefix:
                prefix = self._card_a_prefix if \
                           path.startswith(self.normalize_path(self._card_a_prefix)) else None
            if not prefix and self._card_b_prefix:
                prefix = self._card_b_prefix if \
                           path.startswith(self.normalize_path(self._card_b_prefix)) else None
            if prefix is None:
                prints('in add_books_to_metadata. Prefix is None!', path,
                        self._main_prefix)
                continue
            #print "Add book to metatdata: "
            #print "prefix: " + prefix
            lpath = path.partition(prefix)[2]
            if lpath.startswith('/') or lpath.startswith('\\'):
                lpath = lpath[1:]
            #print "path: " + lpath
            #book = self.book_class(prefix, lpath, other=info)
            lpath = self.normalize_path(prefix + lpath)
            book = Book(prefix, lpath, '', '', '', '', '', '', other=info)
            if book.size is None:
                book.size = os.stat(self.normalize_path(path)).st_size
            booklists[blist].add_book(book, replace_metadata=True)
        self.report_progress(1.0, _('Adding books to device metadata listing...'))
    def contentid_from_path(self, path, ContentType):
        if ContentType == 6:
            ContentID = os.path.splitext(path)[0]
            # Remove the prefix on the file.  it could be either
            ContentID = ContentID.replace(self._main_prefix, '')
            if self._card_a_prefix is not None:
                ContentID = ContentID.replace(self._card_a_prefix, '')
        elif ContentType == 999: # HTML Files
            ContentID = path
            ContentID = ContentID.replace(self._main_prefix, "/mnt/onboard/")
            if self._card_a_prefix is not None:
                ContentID = ContentID.replace(self._card_a_prefix, "/mnt/sd/")
        else: # ContentType = 16
            ContentID = path
            ContentID = ContentID.replace(self._main_prefix, "file:///mnt/onboard/")
            if self._card_a_prefix is not None:
                ContentID = ContentID.replace(self._card_a_prefix, "file:///mnt/sd/")
        ContentID = ContentID.replace("\\", '/')
        return ContentID
    def path_from_contentid(self, ContentID, ContentType, oncard):
        path = ContentID
        if oncard == 'cardb':
            print 'path from_contentid cardb'
        elif oncard == 'carda':
            path = path.replace("file:///mnt/sd/", self._card_a_prefix)
            # print "SD Card: " + filename
        else:
            if ContentType == "6":
                # This is a hack as the kobo files do not exist
                # but the path is required to make a unique id
                # for calibre's reference
                path = self._main_prefix + path + '.kobo'
                # print "Path: " + path
            else:
                # if path.startswith("file:///mnt/onboard/"):
                path = path.replace("file:///mnt/onboard/", self._main_prefix)
                path = path.replace("/mnt/onboard/", self._main_prefix)
                    # print "Internal: " + filename
        return path
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -49,3 +49,41 @@ class AVANT(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True
 class SWEEX(USBMS):
    name           = 'Sweex Device Interface'
    gui_name       = 'Sweex'
    description    = _('Communicate with the Sweex MM300')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
    FORMATS     = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
    BCD         = [0x0319]
    VENDOR_NAME = 'SWEEX'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True
 class PDNOVEL(USBMS):
    name = 'Pandigital Novel device interface'
    gui_name = 'PD Novel'
    description = _('Communicate with the Pandigital Novel')
    author = 'Kovid Goyal'
    supported_platforms = ['windows', 'linux', 'osx']
    FORMATS = ['epub', 'pdf']
    VENDOR_ID   = [0x18d1]
    PRODUCT_ID  = [0xb004]
    BCD         = [0x224]
    VENDOR_NAME = 'ANDROID'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '__UMS_COMPOSITE'
    EBOOK_DIR_MAIN = 'eBooks'
    SUPPORTS_SUB_DIRS = False
--- a/src/calibre/devices/nokia/driver.py
+++ b/src/calibre/devices/nokia/driver.py
@ -67,3 +67,24 @@ class E71X(USBMS):
    VENDOR_NAME      = 'NOKIA'
    WINDOWS_MAIN_MEM = 'S60'
 class E52(USBMS):
    name = 'Nokia E52 device interface'
    gui_name = 'Nokia E52'
    description = _('Communicate with the Nokia E52')
    author = 'David Ignjic'
    supported_platforms = ['windows', 'linux', 'osx']
    VENDOR_ID = [0x421]
    PRODUCT_ID = [0x1CD]
    BCD = [0x100]
    FORMATS = ['mobi', 'prc']
    EBOOK_DIR_MAIN = 'eBooks'
    SUPPORTS_SUB_DIRS = True
    VENDOR_NAME = 'NOKIA'
    WINDOWS_MAIN_MEM = 'S60'
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -99,7 +99,7 @@ class PRS505(USBMS):
        if self._card_b_prefix is not None:
            if not write_cache(self._card_b_prefix):
                self._card_b_prefix = None
-
+        self.booklist_class.rebuild_collections = self.rebuild_collections
    def get_device_information(self, end_session=True):
        return (self.gui_name, '', '', '')
@ -145,7 +145,7 @@ class PRS505(USBMS):
                blists[i] = booklists[i]
        opts = self.settings()
        if opts.extra_customization:
-            collections = [x.strip() for x in
+            collections = [x.lower().strip() for x in
                    opts.extra_customization.split(',')]
        else:
            collections = []
@ -156,4 +156,10 @@ class PRS505(USBMS):
        USBMS.sync_booklists(self, booklists, end_session=end_session)
        debug_print('PRS505: finished sync_booklists')
    def rebuild_collections(self, booklist, oncard):
        debug_print('PRS505: started rebuild_collections on card', oncard)
        c = self.initialize_XML_cache()
        c.rebuild_collections(booklist, {'carda':1, 'cardb':2}.get(oncard, 0))
        c.write()
        debug_print('PRS505: finished rebuild_collections')
--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -6,10 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, time
 from pprint import pprint
 from base64 import b64decode
 from uuid import uuid4
 from lxml import etree
 from calibre import prints, guess_type
@ -62,8 +60,7 @@ class XMLCache(object):
    def __init__(self, paths, prefixes, use_author_sort):
        if DEBUG:
-            debug_print('Building XMLCache...')
+            debug_print('Building XMLCache...', paths)
            pprint(paths)
        self.paths = paths
        self.prefixes = prefixes
        self.use_author_sort = use_author_sort
@ -147,39 +144,73 @@ class XMLCache(object):
                        if title+str(i) not in seen:
                            title = title+str(i)
                            playlist.set('title', title)
                            seen.add(title)
                            break
                else:
                    seen.add(title)
-    def get_playlist_map(self):
+    def build_id_playlist_map(self, bl_index):
-        debug_print('Start get_playlist_map')
+        '''
-        ans = {}
+        Return a map of the collections in books: {lpaths: [collection names]}
        '''
        debug_print('Start build_id_playlist_map')
        self.ensure_unique_playlist_titles()
        debug_print('after ensure_unique_playlist_titles')
        self.prune_empty_playlists()
-        debug_print('get_playlist_map loop')
+        debug_print('after cleaning playlists')
-        for i, root in self.record_roots.items():
+        root = self.record_roots[bl_index]
-            debug_print('get_playlist_map loop', i)
+        if root is None:
-            id_map = self.build_id_map(root)
+            return
-            ans[i] = []
+        id_map = self.build_id_map(root)
-            for playlist in root.xpath('//*[local-name()="playlist"]'):
+        playlist_map = {}
-                items = []
+        # foreach playlist, get the lpaths for the ids in it, then add to dict
-                for item in playlist:
+        for playlist in root.xpath('//*[local-name()="playlist"]'):
-                    id_ = item.get('id', None)
+            name = playlist.get('title')
-                    record = id_map.get(id_, None)
+            if name is None:
-                    if record is not None:
+                debug_print('build_id_playlist_map: unnamed playlist!')
-                        items.append(record)
+                continue
-                ans[i].append((playlist.get('title'), items))
+            for item in playlist:
-        debug_print('end get_playlist_map')
+                # translate each id into its lpath
-        return ans
+                id_ = item.get('id', None)
                if id_ is None:
                    debug_print('build_id_playlist_map: id_ is None!')
                    continue
                bk = id_map.get(id_, None)
                if bk is None:
                    debug_print('build_id_playlist_map: book is None!', id_)
                    continue
                lpath = bk.get('path', None)
                if lpath is None:
                    debug_print('build_id_playlist_map: lpath is None!', id_)
                    continue
                if lpath not in playlist_map:
                    playlist_map[lpath] = []
                playlist_map[lpath].append(name)
        debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
        return playlist_map
    def reset_existing_playlists_map(self):
        '''
        Call this method before calling get_or_create_playlist in the context of
        a given job. Call it again after deleting any playlists. The current
        implementation adds all new playlists before deleting any, so that
        constraint is respected.
        '''
        self._playlist_to_playlist_id_map = {}
    def get_or_create_playlist(self, bl_idx, title):
        # maintain a private map of playlists to their ids. Don't check if it
        # exists, because reset_existing_playlist_map must be called before it
        # is used to ensure that deleted playlists are taken into account
        root = self.record_roots[bl_idx]
-        for playlist in root.xpath('//*[local-name()="playlist"]'):
+        if bl_idx not in self._playlist_to_playlist_id_map:
-            if playlist.get('title', None) == title:
+            self._playlist_to_playlist_id_map[bl_idx] = {}
-                return playlist
+            for playlist in root.xpath('//*[local-name()="playlist"]'):
-        if DEBUG:
+                pl_title = playlist.get('title', None)
-            debug_print('Creating playlist:', title)
+                if pl_title is not None:
                    self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
        if title in self._playlist_to_playlist_id_map[bl_idx]:
            return self._playlist_to_playlist_id_map[bl_idx][title]
        debug_print('Creating playlist:', title)
        ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
                nsmap=root.nsmap, attrib={
                    'uuid' : uuid(),
@ -188,12 +219,12 @@ class XMLCache(object):
                    'sourceid': '1'
                    })
        root.append(ans)
        self._playlist_to_playlist_id_map[bl_idx][title] = ans
        return ans
    # }}}
    def fix_ids(self): # {{{
-        if DEBUG:
+        debug_print('Running fix_ids()')
            debug_print('Running fix_ids()')
        def ensure_numeric_ids(root):
            idmap = {}
@ -251,7 +282,9 @@ class XMLCache(object):
                ensure_media_xml_base_ids(root)
            idmap = ensure_numeric_ids(root)
-            remap_playlist_references(root, idmap)
+            if len(idmap) > 0:
                debug_print('fix_ids: found some non-numeric ids')
                remap_playlist_references(root, idmap)
            if i == 0:
                sourceid, playlist_sid = 1, 0
                base = 0
@ -276,38 +309,19 @@ class XMLCache(object):
    def update_booklist(self, bl, bl_index):
        if bl_index not in self.record_roots:
            return
-        if DEBUG:
+        debug_print('Updating JSON cache:', bl_index)
-            debug_print('Updating JSON cache:', bl_index)
+        playlist_map = self.build_id_playlist_map(bl_index)
        root = self.record_roots[bl_index]
        pmap = self.get_playlist_map()[bl_index]
        playlist_map = {}
        for title, records in pmap:
            for record in records:
                path = record.get('path', None)
                if path:
                    if path not in playlist_map:
                        playlist_map[path] = []
                    playlist_map[path].append(title)
        lpath_map = self.build_lpath_map(root)
        for book in bl:
            record = lpath_map.get(book.lpath, None)
            if record is not None:
                title = record.get('title', None)
                if title is not None and title != book.title:
-                    if DEBUG:
+                    debug_print('Renaming title', book.title, 'to', title)
                        debug_print('Renaming title', book.title, 'to', title)
                    book.title = title
-# We shouldn't do this for Sonys, because the reader strips
+                    # Don't set the author, because the reader strips all but
-# all but the first author.
+                    # the first author.
 #                authors = record.get('author', None)
 #                if authors is not None:
 #                    authors = string_to_authors(authors)
 #                    if authors != book.authors:
 #                        if DEBUG:
 #                            prints('Renaming authors', book.authors, 'to',
 #                                    authors)
 #                        book.authors = authors
                for thumbnail in record.xpath(
                        'descendant::*[local-name()="thumbnail"]'):
                    for img in thumbnail.xpath(
@ -318,47 +332,57 @@ class XMLCache(object):
                            book.thumbnail = raw
                            break
                    break
-                if book.lpath in playlist_map:
+                book.device_collections = playlist_map.get(book.lpath, [])
                    tags = playlist_map[book.lpath]
                    book.device_collections = tags
        debug_print('Finished updating JSON cache:', bl_index)
    # }}}
    # Update XML from JSON {{{
    def update(self, booklists, collections_attributes):
-        debug_print('Starting update XML from JSON')
+        debug_print('Starting update', collections_attributes)
        playlist_map = self.get_playlist_map()
        for i, booklist in booklists.items():
-            if DEBUG:
+            playlist_map = self.build_id_playlist_map(i)
-                debug_print('Updating XML Cache:', i)
+            debug_print('Updating XML Cache:', i)
            root = self.record_roots[i]
            lpath_map = self.build_lpath_map(root)
            gtz_count = ltz_count = 0
            for book in booklist:
                path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
 #                record = self.book_by_lpath(book.lpath, root)
                record = lpath_map.get(book.lpath, None)
                if record is None:
                    record = self.create_text_record(root, i, book.lpath)
-                self.update_text_record(record, book, path, i)
+                (gtz_count, ltz_count) = self.update_text_record(record, book,
-
+                                                path, i, gtz_count, ltz_count)
-            bl_pmap = playlist_map[i]
+                # Ensure the collections in the XML database are recorded for
-            self.update_playlists(i, root, booklist, bl_pmap,
+                # this book
-                    collections_attributes)
+                if book.device_collections is None:
                    book.device_collections = []
                book.device_collections = playlist_map.get(book.lpath, [])
            debug_print('Timezone votes: %d GMT, %d LTZ'%(gtz_count, ltz_count))
            self.update_playlists(i, root, booklist, collections_attributes)
        # Update the device collections because update playlist could have added
        # some new ones.
        debug_print('In update/ Starting refresh of device_collections')
        for i, booklist in booklists.items():
            playlist_map = self.build_id_playlist_map(i)
            for book in booklist:
                book.device_collections = playlist_map.get(book.lpath, [])
        self.fix_ids()
        debug_print('Finished update')
    def rebuild_collections(self, booklist, bl_index):
        if bl_index not in self.record_roots:
            return
        root = self.record_roots[bl_index]
        self.update_playlists(bl_index, root, booklist, [])
        self.fix_ids()
-        # This is needed to update device_collections
+    def update_playlists(self, bl_index, root, booklist, collections_attributes):
-        for i, booklist in booklists.items():
+        debug_print('Starting update_playlists', collections_attributes, bl_index)
-            self.update_booklist(booklist, i)
+        self.reset_existing_playlists_map()
        debug_print('Finished update XML from JSON')
    def update_playlists(self, bl_index, root, booklist, playlist_map,
            collections_attributes):
        debug_print('Starting update_playlists')
        collections = booklist.get_collections(collections_attributes)
        lpath_map = self.build_lpath_map(root)
        debug_print('update_playlists: finished building maps')
        for category, books in collections.items():
            records = [lpath_map.get(b.lpath, None) for b in books]
            # Remove any books that were not found, although this
@ -367,25 +391,34 @@ class XMLCache(object):
                debug_print('WARNING: Some elements in the JSON cache were not'
                        ' found in the XML cache')
            records = [x for x in records if x is not None]
            # Ensure each book has an ID.
            for rec in records:
                if rec.get('id', None) is None:
                    rec.set('id', str(self.max_id(root)+1))
            ids = [x.get('id', None) for x in records]
            # Given that we set the ids, there shouldn't be any None's. But
            # better to be safe...
            if None in ids:
-                if DEBUG:
+                debug_print('WARNING: Some <text> elements do not have ids')
-                    debug_print('WARNING: Some <text> elements do not have ids')
+                ids = [x for x in ids if x is not None]
                    ids = [x for x in ids if x is not None]
            playlist = self.get_or_create_playlist(bl_index, category)
            # Get the books currently in the playlist. We will need them to be
            # sure to put back any books that were manually added.
            playlist_ids = []
            for item in playlist:
                id_ = item.get('id', None)
                if id_ is not None:
                    playlist_ids.append(id_)
            # Empty the playlist. We do this so that the playlist will have the
            # order specified by get_collections
            for item in list(playlist):
                playlist.remove(item)
            # Get a list of ids not known by get_collections
            extra_ids = [x for x in playlist_ids if x not in ids]
            # Rebuild the collection in the order specified by get_collections. Then
            # add the ids that get_collections didn't know about.
            for id_ in ids + extra_ids:
                item = playlist.makeelement(
                        '{%s}item'%self.namespaces[bl_index],
@ -423,11 +456,38 @@ class XMLCache(object):
        root.append(ans)
        return ans
-    def update_text_record(self, record, book, path, bl_index):
+    def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count):
        '''
        Update the Sony database from the book. This is done if the timestamp in
        the db differs from the timestamp on the file.
        '''
        # It seems that a Sony device can sometimes know what timezone it is in,
        # and apparently converts the dates to GMT when it writes them to the
        # db. Unfortunately, we can't tell when it does this, so we use a
        # horrible heuristic. First, set dates only for new books, trying to
        # avoid upsetting the sony. Use the timezone determined through the
        # voting described next. Second, voting: if a book is not new, compare
        # its Sony DB date against localtime and gmtime. Count the matches. When
        # we must set a date, use the one with the most matches. Use localtime
        # if the case of a tie, and hope it is right.
        timestamp = os.path.getmtime(path)
-        date = strftime(timestamp)
+        rec_date = record.get('date', None)
-        if date != record.get('date', None):
+        if not getattr(book, '_new_book', False): # book is not new
            if strftime(timestamp, zone=time.gmtime) == rec_date:
                gtz_count += 1
            elif strftime(timestamp, zone=time.localtime) == rec_date:
                ltz_count += 1
        else: # book is new. Set the time using the current votes
            if ltz_count >= gtz_count:
                tz = time.localtime
                debug_print("Using localtime TZ for new book", book.lpath)
            else:
                tz = time.gmtime
                debug_print("Using GMT TZ for new book", book.lpath)
            date = strftime(timestamp, zone=tz)
            record.set('date', date)
        record.set('size', str(os.stat(path).st_size))
        title = book.title if book.title else _('Unknown')
        record.set('title', title)
@ -452,6 +512,7 @@ class XMLCache(object):
        if 'id' not in record.attrib:
            num = self.max_id(record.getroottree().getroot())
            record.set('id', str(num+1))
        return (gtz_count, ltz_count)
    # }}}
    # Writing the XML files {{{
@ -544,10 +605,5 @@ class XMLCache(object):
                        break
                self.namespaces[i] = ns
 #        if DEBUG:
 #            debug_print('Found nsmaps:')
 #            pprint(self.nsmaps)
 #            debug_print('Found namespaces:')
 #            pprint(self.namespaces)
    # }}}
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -11,10 +11,11 @@ from calibre.devices.mime import mime_type_ext
 from calibre.devices.interface import BookList as _BookList
 from calibre.constants import filesystem_encoding, preferred_encoding
 from calibre import isbytestring
 from calibre.utils.config import prefs
 class Book(MetaInformation):
-    BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections']
+    BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
    JSON_ATTRS = [
        'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
@ -29,6 +30,7 @@ class Book(MetaInformation):
        MetaInformation.__init__(self, '')
        self._new_book = False
        self.device_collections = []
        self.path = os.path.join(prefix, lpath)
        if os.sep == '\\':
@ -76,7 +78,7 @@ class Book(MetaInformation):
        in C{other} takes precedence, unless the information in C{other} is NULL.
        '''
-        MetaInformation.smart_update(self, other)
+        MetaInformation.smart_update(self, other, replace_tags=True)
        for attr in self.BOOK_ATTRS:
            if hasattr(other, attr):
@ -132,10 +134,28 @@ class CollectionsBookList(BookList):
    def get_collections(self, collection_attributes):
        collections = {}
        series_categories = set([])
-        collection_attributes = list(collection_attributes)+['device_collections']
+        # This map of sets is used to avoid linear searches when testing for
-        for attr in collection_attributes:
+        # book equality
-            attr = attr.strip()
+        collections_lpaths = {}
-            for book in self:
+        for book in self:
            # Make sure we can identify this book via the lpath
            lpath = getattr(book, 'lpath', None)
            if lpath is None:
                continue
            # Decide how we will build the collections. The default: leave the
            # book in all existing collections. Do not add any new ones.
            attrs = ['device_collections']
            if getattr(book, '_new_book', False):
                if prefs['preserve_user_collections']:
                    # Ensure that the book is in all the book's existing
                    # collections plus all metadata collections
                    attrs += collection_attributes
                else:
                    # The book's existing collections are ignored. Put the book
                    # in collections defined by its metadata.
                    attrs = collection_attributes
            for attr in attrs:
                attr = attr.strip()
                val = getattr(book, attr, None)
                if not val: continue
                if isbytestring(val):
@ -150,11 +170,12 @@ class CollectionsBookList(BookList):
                        continue
                    if category not in collections:
                        collections[category] = []
-                    if book not in collections[category]:
+                        collections_lpaths[category] = set()
                    if lpath not in collections_lpaths[category]:
                        collections_lpaths[category].add(lpath)
                        collections[category].append(book)
-                        if attr == 'series':
+                    if attr == 'series':
-                            series_categories.add(category)
+                        series_categories.add(category)
        # Sort collections
        for category, books in collections.items():
            def tgetter(x):
@ -167,3 +188,15 @@ class CollectionsBookList(BookList):
                books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
        return collections
    def rebuild_collections(self, booklist, oncard):
        '''
        For each book in the booklist for the card oncard, remove it from all
        its current collections, then add it to the collections specified in
        device_collections.
        oncard is None for the main memory, carda for card A, cardb for card B,
        etc.
        booklist is the object created by the :method:`books` call above.
        '''
        pass
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -78,9 +78,6 @@ class Device(DeviceConfig, DevicePlugin):
    STORAGE_CARD_VOLUME_LABEL = ''
    STORAGE_CARD2_VOLUME_LABEL = None
    SUPPORTS_SUB_DIRS = False
    MUST_READ_METADATA = False
    SUPPORTS_USE_AUTHOR_SORT = False
    EBOOK_DIR_MAIN = ''
    EBOOK_DIR_CARD_A = ''
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -13,6 +13,10 @@ class DeviceConfig(object):
    EXTRA_CUSTOMIZATION_MESSAGE = None
    EXTRA_CUSTOMIZATION_DEFAULT = None
    SUPPORTS_SUB_DIRS = False
    MUST_READ_METADATA = False
    SUPPORTS_USE_AUTHOR_SORT = False
    #: If None the default is used
    SAVE_TEMPLATE = None
@ -23,9 +27,14 @@ class DeviceConfig(object):
            config().parse().send_template
    @classmethod
-    def _config(cls):
+    def _config_base_name(cls):
        klass = cls if isinstance(cls, type) else cls.__class__
-        c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers'))
+        return klass.__name__
    @classmethod
    def _config(cls):
        name = cls._config_base_name()
        c = Config('device_drivers_%s' % name, _('settings for device drivers'))
        c.add_opt('format_map', default=cls.FORMATS,
                help=_('Ordered list of formats the device will accept'))
        c.add_opt('use_subdirs', default=True,
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -233,6 +233,7 @@ class USBMS(CLI, Device):
            book = self.book_class(prefix, lpath, other=info)
            if book.size is None:
                book.size = os.stat(self.normalize_path(path)).st_size
            book._new_book = True # Must be before add_book
            booklists[blist].add_book(book, replace_metadata=True)
        self.report_progress(1.0, _('Adding books to device metadata listing...'))
        debug_print('USBMS: finished adding metadata')
@ -273,6 +274,9 @@ class USBMS(CLI, Device):
        self.report_progress(1.0, _('Removing books from device metadata listing...'))
        debug_print('USBMS: finished removing metadata for %d books'%(len(paths)))
    # If you override this method and you use book._new_book, then you must
    # complete the processing before you call this method. The flag is cleared
    # at the end just before the return
    def sync_booklists(self, booklists, end_session=True):
        debug_print('USBMS: starting sync_booklists')
@ -286,11 +290,18 @@ class USBMS(CLI, Device):
                js = [item.to_json() for item in booklists[listid] if
                        hasattr(item, 'to_json')]
                with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
-                    json.dump(js, f, indent=2, encoding='utf-8')
+                    f.write(json.dumps(js, indent=2, encoding='utf-8'))
        write_prefix(self._main_prefix, 0)
        write_prefix(self._card_a_prefix, 1)
        write_prefix(self._card_b_prefix, 2)
        # Clear the _new_book indication, as we are supposed to be done with
        # adding books at this point
        for blist in booklists:
            if blist is not None:
                for book in blist:
                    book._new_book = False
        self.report_progress(1.0, _('Sending metadata to device...'))
        debug_print('USBMS: finished sync_booklists')
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin):
            log.debug('stream.name=%s' % stream.name)
            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
            mainpath = os.path.join(tdir, mainname)
            #raw_input()
            metadata = get_metadata_from_reader(self._chm_reader)
@ -92,7 +91,7 @@ class CHMInput(InputFormatPlugin):
            metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
-            metadata.add('language', get_lang())
+            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', _('Unknown'))
@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin):
        log.debug('Found %d section nodes' % len(chapters))
        htmlpath = os.path.splitext(hhcpath)[0] + ".html"
        f = open(htmlpath, 'wb')
        f.write('<html><head><meta http-equiv="Content-type"'
                ' content="text/html;charset=UTF-8" /></head><body>\n')
        if chapters:
            f.write('<html><head><meta http-equiv="Content-type"'
                ' content="text/html;charset=UTF-8" /></head><body>\n')
            path0 = chapters[0][1]
            subpath = os.path.dirname(path0)
@ -158,7 +156,9 @@ class CHMInput(InputFormatPlugin):
                    url = url.encode('utf-8')
                f.write(url)
-        f.write("</body></html>")
+            f.write("</body></html>")
        else:
            f.write(hhcdata)
        f.close()
        return htmlpath
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -8,7 +8,7 @@ import os, re
 from mimetypes import guess_type as guess_mimetype
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
-from calibre.constants import iswindows
+from calibre.constants import iswindows, filesystem_encoding
 from calibre.utils.chm.chm import CHMFile
 from calibre.utils.chm.chmlib import (
  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -78,6 +78,8 @@ class CHMError(Exception):
 class CHMReader(CHMFile):
    def __init__(self, input, log):
        CHMFile.__init__(self)
        if isinstance(input, unicode):
            input = input.encode(filesystem_encoding)
        if not self.LoadCHM(input):
            raise CHMError("Unable to open CHM file '%s'"%(input,))
        self.log = log
@ -91,7 +93,6 @@ class CHMReader(CHMFile):
        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
        self.hhc_path = self.root + ".hhc"
    def _parse_toc(self, ul, basedir=os.getcwdu()):
        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
        self._playorder += 1
@ -152,6 +153,8 @@ class CHMReader(CHMFile):
                if f.lower() == self.hhc_path.lower():
                    self.hhc_path = f
                    break
        if self.hhc_path not in files and files:
            self.hhc_path = files[0]
    def _reformat(self, data):
        try:
@ -159,7 +162,7 @@ class CHMReader(CHMFile):
            soup = BeautifulSoup(data)
        except ValueError:
            # hit some strange encoding problems...
-            print "Unable to parse html for cleaning, leaving it :("
+            self.log.exception("Unable to parse html for cleaning, leaving it")
            return data
        # nuke javascript...
        [s.extract() for s in soup('script')]
--- a/src/calibre/ebooks/compression/palmdoc.c
+++ b/src/calibre/ebooks/compression/palmdoc.c
@ -151,6 +151,7 @@ cpalmdoc_do_compress(buffer *b, char *output) {
            for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
        }
    }
    PyMem_Free(temp.data);
    return output - head;
 }
@ -168,7 +169,9 @@ cpalmdoc_compress(PyObject *self, PyObject *args) {
    for (j = 0; j < input_len; j++) 
        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
    b.len = input_len;
-    output = (char *)PyMem_Malloc(sizeof(char) * b.len);
+    // Make the output buffer larger than the input as sometimes
    // compression results in a larger block
    output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
    if (output == NULL) return PyErr_NoMemory();
    j = cpalmdoc_do_compress(&b, output);
    if ( j == 0) return PyErr_NoMemory();
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
 LIGATURES = {
-        u'\u00c6': u'AE',
+#        u'\u00c6': u'AE',
-        u'\u00e6': u'ae',
+#        u'\u00e6': u'ae',
-        u'\u0152': u'OE',
+#        u'\u0152': u'OE',
-        u'\u0153': u'oe',
+#        u'\u0153': u'oe',
-        u'\u0132': u'IJ',
+#        u'\u0132': u'IJ',
-        u'\u0133': u'ij',
+#        u'\u0133': u'ij',
-        u'\u1D6B': u'ue',
+#        u'\u1D6B': u'ue',
        u'\uFB00': u'ff',
        u'\uFB01': u'fi',
        u'\uFB02': u'fl',
@ -107,9 +107,21 @@ class CSSPreProcessor(object):
    PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
-    def __call__(self, data):
+    def __call__(self, data, add_namespace=False):
        from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
        data = self.PAGE_PAT.sub('', data)
-        return data
+        if not add_namespace:
            return data
        ans, namespaced = [], False
        for line in data.splitlines():
            ll = line.lstrip()
            if not (namespaced or ll.startswith('@import') or
                        ll.startswith('@charset')):
                ans.append(XHTML_CSS_NAMESPACE.strip())
                namespaced = True
            ans.append(line)
        return u'\n'.join(ans)
 class HTMLPreProcessor(object):
@ -268,7 +280,7 @@ class HTMLPreProcessor(object):
        if getattr(self.extra_opts, 'remove_footer', None):
            try:
-                rules.insert(0
+                rules.insert(0,
                    (re.compile(self.extra_opts.footer_regex), lambda match : '')
                )
            except:
--- a/src/calibre/ebooks/epub/fix/init.py
+++ b/src/calibre/ebooks/epub/fix/init.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.customize import Plugin
 class InvalidEpub(ValueError):
    pass
 class ePubFixer(Plugin):
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal'
    type = _('ePub Fixer')
    can_be_disabled = True
    # API that subclasses must implement {{{
    @property
    def short_description(self):
        raise NotImplementedError
    @property
    def long_description(self):
        raise NotImplementedError
    @property
    def fix_name(self):
        raise NotImplementedError
    @property
    def options(self):
        '''
        Return a list of 4-tuples
        (option_name, type, default, help_text)
        type is one of 'bool', 'int', 'string'
        '''
        return []
    def run(self, container, opts, log, fix=False):
        raise NotImplementedError
    # }}}
    def add_options_to_parser(self, parser):
        parser.add_option('--' + self.fix_name.replace('_', '-'),
                help=self.long_description, action='store_true', default=False)
        for option in self.options:
            action = 'store'
            if option[1] == 'bool':
                action = 'store_true'
            kwargs = {'action': action, 'default':option[2], 'help':option[3]}
            if option[1] != 'bool':
                kwargs['type'] = option[1]
            parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
--- a/src/calibre/ebooks/epub/fix/container.py
+++ b/src/calibre/ebooks/epub/fix/container.py
@ -0,0 +1,182 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, posixpath, urllib, sys
 from lxml import etree
 from calibre.ebooks.epub.fix import InvalidEpub
 from calibre import guess_type, prepare_string_for_xml
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.constants import iswindows
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
 exists, join = os.path.exists, os.path.join
 OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
 OPF_NS = 'http://www.idpf.org/2007/opf'
 class Container(object):
    META_INF = {
            'container.xml' : True,
            'manifest.xml' : False,
            'encryption.xml' : False,
            'metadata.xml' : False,
            'signatures.xml' : False,
            'rights.xml' : False,
    }
    def __init__(self, path, log):
        self.root = os.path.abspath(path)
        self.log = log
        self.dirtied = set([])
        self.cache = {}
        self.mime_map = {}
        if exists(join(self.root, 'mimetype')):
            os.remove(join(self.root, 'mimetype'))
        container_path = join(self.root, 'META-INF', 'container.xml')
        if not exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
        self.container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = self.container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
        opf_path = os.path.join(self.root,
                *opf_files[0].get('full-path').split('/'))
        if not exists(opf_path):
            raise InvalidEpub('OPF file does not exist at location pointed to'
                    ' by META-INF/container.xml')
        # Map of relative paths with / separators to absolute
        # paths on filesystem with os separators
        self.name_map = {}
        for dirpath, dirnames, filenames in os.walk(self.root):
            for f in filenames:
                path = join(dirpath, f)
                name = os.path.relpath(path, self.root).replace(os.sep, '/')
                self.name_map[name] = path
                if path == opf_path:
                    self.opf_name = name
                    self.mime_map[name] = guess_type('a.opf')[0]
        for item in self.opf.xpath(
                '//opf:manifest/opf:item[@href and @media-type]',
                namespaces={'opf':OPF_NS}):
            href = item.get('href')
            self.mime_map[self.href_to_name(href,
                posixpath.dirname(self.opf_name))] = item.get('media-type')
    def manifest_worthy_names(self):
        for name in self.name_map:
            if name.endswith('.opf'): continue
            if name.startswith('META-INF') and \
                    posixpath.basename(name) in self.META_INF: continue
            yield name
    def delete_name(self, name):
        self.mime_map.pop(name, None)
        path = self.name_map[name]
        os.remove(path)
        self.name_map.pop(name)
    def manifest_item_for_name(self, name):
        href = self.name_to_href(name,
            posixpath.dirname(self.opf_name))
        q = prepare_string_for_xml(href, attribute=True)
        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
                namespaces={'opf':OPF_NS})
        if not existing:
            return None
        return existing[0]
    def add_name_to_manifest(self, name):
        item = self.manifest_item_for_name(name)
        if item is not None:
            return
        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
                id=self.generate_manifest_id())
        mt = guess_type(posixpath.basename(name))[0]
        if not mt:
            mt = 'application/octest-stream'
        item.set('media-type', mt)
        manifest.append(item)
    def generate_manifest_id(self):
        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
                namespaces={'opf':OPF_NS})
        ids = set([x.get('id') for x in items])
        for x in xrange(sys.maxint):
            c = 'id%d'%x
            if c not in ids:
                return c
    @property
    def opf(self):
        return self.get(self.opf_name)
    def href_to_name(self, href, base=''):
        href = urllib.unquote(href.partition('#')[0])
        name = href
        if base:
            name = posixpath.join(base, href)
        return name
    def name_to_href(self, name, base):
        if not base:
            return name
        return posixpath.relpath(name, base)
    def get_raw(self, name):
        path = self.name_map[name]
        return open(path, 'rb').read()
    def get(self, name):
        if name in self.cache:
            return self.cache[name]
        raw = self.get_raw(name)
        if name in self.mime_map:
            raw = self._parse(raw, self.mime_map[name])
        self.cache[name] = raw
        return raw
    def set(self, name, val):
        self.cache[name] = val
        self.dirtied.add(name)
    def _parse(self, raw, mimetype):
        mt = mimetype.lower()
        if mt.endswith('+xml'):
            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
            return etree.fromstring(xml_to_unicode(raw,
                strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
        return raw
    def write(self, path):
        for name in self.dirtied:
            data = self.cache[name]
            raw = data
            if hasattr(data, 'xpath'):
                raw = etree.tostring(data, encoding='utf-8',
                        xml_declaration=True)
            with open(self.name_map[name], 'wb') as f:
                f.write(raw)
        self.dirtied.clear()
        zf = ZipFile(path, 'w')
        zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
                compression=ZIP_STORED)
        zf.add_dir(self.root)
        zf.close()
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@ -0,0 +1,82 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
 from calibre.utils.date import parse_date, strptime
 class Epubcheck(ePubFixer):
    name = 'Workaround epubcheck bugs'
    @property
    def short_description(self):
        return _('Workaround epubcheck bugs')
    @property
    def long_description(self):
        return _('Workarounds for bugs in the latest release of epubcheck. '
                'epubcheck reports many things as errors that are not '
                'actually errors. %prog will try to detect these and replace '
                'them with constructs that epubcheck likes. This may cause '
                'significant changes to your epub, complain to the epubcheck '
                'project.')
    @property
    def fix_name(self):
        return 'epubcheck'
    def fix_pubdates(self):
        dirtied = False
        opf = self.container.opf
        for dcdate in opf.xpath('//dc:date',
                namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
            raw = dcdate.text
            if not raw: raw = ''
            default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
            try:
                ts = parse_date(raw, assume_utc=False, as_utc=True,
                        default=default)
            except:
                raise InvalidEpub('Invalid date set in OPF', raw)
            sval = ts.strftime('%Y-%m-%d')
            if sval != raw:
                self.log.error(
                    'OPF contains date', raw, 'that epubcheck does not like')
                if self.fix:
                    dcdate.text = sval
                    self.log('\tReplaced', raw, 'with', sval)
                    dirtied = True
        if dirtied:
            self.container.set(self.container.opf_name, opf)
    def fix_preserve_aspect_ratio(self):
        for name in self.container.name_map:
            mt = self.container.mime_map.get(name, '')
            if mt.lower() == 'application/xhtml+xml':
                root = self.container.get(name)
                dirtied = False
                for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
                        namespaces={'svg':'http://www.w3.org/2000/svg'}):
                    self.log.error('Found <svg> element with'
                            ' preserveAspectRatio="none" which epubcheck '
                            'cannot handle')
                    if self.fix:
                        svg.set('preserveAspectRatio', 'xMidYMid meet')
                        dirtied = True
                        self.log('\tReplaced none with xMidYMid meet')
                if dirtied:
                    self.container.set(name, root)
    def run(self, container, opts, log, fix=False):
        self.container = container
        self.opts = opts
        self.log = log
        self.fix = fix
        self.fix_pubdates()
        self.fix_preserve_aspect_ratio()
--- a/src/calibre/ebooks/epub/fix/main.py
+++ b/src/calibre/ebooks/epub/fix/main.py
@ -0,0 +1,56 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys, os
 from calibre.utils.config import OptionParser
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.logging import default_log
 from calibre.customize.ui import epub_fixers
 from calibre.ebooks.epub.fix.container import Container
 def option_parser():
    parser = OptionParser(usage=_(
        '%prog [options] file.epub\n\n'
        'Fix common problems in EPUB files that can cause them '
        'to be rejected by poorly designed publishing services.\n\n'
        'By default, no fixing is done and messages are printed out '
        'for each error detected. Use the options to control which errors '
        'are automatically fixed.'))
    for fixer in epub_fixers():
        fixer.add_options_to_parser(parser)
    return parser
 def run(epub, opts, log):
    with TemporaryDirectory('_epub-fix') as tdir:
        with CurrentDir(tdir):
            zf = ZipFile(epub)
            zf.extractall()
            zf.close()
            container = Container(tdir, log)
            for fixer in epub_fixers():
                fix = getattr(opts, fixer.fix_name, False)
                fixer.run(container, opts, log, fix=fix)
            container.write(epub)
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) != 2:
        parser.print_help()
        print
        default_log.error(_('You must specify an epub file'))
        return
    epub = os.path.abspath(args[1])
    run(epub, opts, default_log)
 if __name__ == '__main__':
    main()
--- a/src/calibre/ebooks/epub/fix/unmanifested.py
+++ b/src/calibre/ebooks/epub/fix/unmanifested.py
@ -0,0 +1,49 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.epub.fix import ePubFixer
 class Unmanifested(ePubFixer):
    name = 'Fix unmanifested files'
    @property
    def short_description(self):
        return _('Fix unmanifested files')
    @property
    def long_description(self):
        return _('Fix unmanifested files. %prog can either add them to '
        'the manifest or delete them as specified by the '
        'delete unmanifested option.')
    @property
    def fix_name(self):
        return 'unmanifested'
    @property
    def options(self):
        return [('delete_unmanifested', 'bool', False,
            _('Delete unmanifested files instead of adding them to the manifest'))]
    def run(self, container, opts, log, fix=False):
        dirtied = False
        for name in list(container.manifest_worthy_names()):
            item = container.manifest_item_for_name(name)
            if item is None:
                log.error(name, 'not in manifest')
                if fix:
                    if opts.delete_unmanifested:
                        container.delete_name(name)
                        log('\tDeleted')
                    else:
                        container.add_name_to_manifest(name)
                        log('\tAdded to manifest')
                        dirtied = True
        if dirtied:
            container.set(container.opf_name, container.opf)
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -84,7 +84,7 @@ class EPUBOutput(OutputFormatPlugin):
        OptionRecommendation(name='no_svg_cover', recommended_value=False,
            help=_('Do not use SVG for the book cover. Use this option if '
-                'your EPUB is going to be used ona  device that does not '
+                'your EPUB is going to be used on a device that does not '
                'support SVG, like the iPhone or the JetBook Lite. '
                'Without this option, such devices will display the cover '
                'as a blank page.')
@ -380,10 +380,9 @@ class EPUBOutput(OutputFormatPlugin):
                    sel = '.'+lb.get('class')
                    for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
                        if sel == rule.selectorList.selectorText:
-                            val = rule.style.removeProperty('margin-left')
+                            rule.style.removeProperty('margin-left')
-                            pval = rule.style.getProperty('padding-left')
+                            # padding-left breaks rendering in webkit and gecko
-                            if val and not pval:
+                            rule.style.removeProperty('padding-left')
                                rule.style.setProperty('padding-left', val)
    # }}}
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -20,7 +20,7 @@ from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux, isfreebsd
+from calibre.constants import islinux, isfreebsd, iswindows
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
@ -32,9 +32,14 @@ class Link(object):
    @classmethod
    def url_to_local_path(cls, url, base):
-        path = urlunparse(('', '', url.path, url.params, url.query, ''))
+        path = url.path
        isabs = False
        if iswindows and path.startswith('/'):
            path = path[1:]
            isabs = True
        path = urlunparse(('', '', path, url.params, url.query, ''))
        path = unquote(path)
-        if os.path.isabs(path):
+        if isabs or os.path.isabs(path):
            return path
        return os.path.abspath(os.path.join(base, path))
@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
            xpath
        from calibre import guess_type
        import cssutils
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb
@ -323,7 +329,7 @@ class HTMLInput(InputFormatPlugin):
            metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
-            metadata.add('language', get_lang())
+            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', self.oeb.translate(__('Unknown')))
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))
        for item in oeb.manifest.values():
-            if item.media_type in OEB_STYLES:
+            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
        oeb.container = DirContainer(os.getcwdu(), oeb.log)
        return oeb
    def link_to_local_path(self, link_, base=None):
        if not isinstance(link_, unicode):
            try:
                link_ = link_.decode('utf-8', 'error')
            except:
                self.log.warn('Failed to decode link %r. Ignoring'%link_)
                return None, None
        try:
            l = Link(link_, base if base else os.getcwdu())
        except:
            self.log.exception('Failed to process link: %r'%link_)
            return None, None
        if l.path is None:
            # Not a local resource
            return None, None
        link = l.path.replace('/', os.sep).strip()
        frag = l.fragment
        if not link:
            return None, None
        return link, frag
    def resource_adder(self, link_, base=None):
-        link = self.urlnormalize(link_)
+        link, frag = self.link_to_local_path(link_, base=base)
-        link, frag = self.urldefrag(link)
+        if link is None:
        link = unquote(link).replace('/', os.sep)
        if not link.strip():
            return link_
        try:
            if base and not os.path.isabs(link):
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
            item = self.oeb.manifest.add(id, href, media_type)
            item.html_input_href = bhref
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href
@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
            nlink = '#'.join((nlink, frag))
        return nlink
-
+    def css_import_handler(self, base, href):
        link, frag = self.link_to_local_path(href, base=base)
        if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
            return (None, None)
        try:
            raw = open(link, 'rb').read().decode('utf-8', 'replace')
            raw = self.oeb.css_preprocessor(raw, add_namespace=True)
        except:
            self.log.exception('Failed to read CSS file: %r'%link)
            return (None, None)
        return (None, raw)
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open('lrs.xml', 'wb').write(xml.encode('utf-8'))
-        parser = etree.XMLParser(recover=True, no_network=True)
+        parser = etree.XMLParser(no_network=True, huge_tree=True)
        doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -870,7 +870,7 @@ class Text(LRFStream):
        open_containers = collections.deque()
        for c in self.content:
            if isinstance(c, basestring):
-                s += prepare_string_for_xml(c)
+                s += prepare_string_for_xml(c).replace('\0', '')
            elif c is None:
                if open_containers:
                    p = open_containers.pop()
--- a/Show More
+++ b/Show More