Sync to trunk.

2025-08-30 23:00:21 -04:00 · 2010-07-13 14:05:53 -04:00 · 2010-07-13 14:05:53 -04:00 · b1eb8f5d58
commit b1eb8f5d58
parent 800256ab65 60c86d4744
211 changed files with 57578 additions and 51984 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,255 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.7.8
+  date: 2010-07-09
+
+  new features:
+    - title: "New tool to help prepare EPUBs for publication"
+      type: major
+      description: >
+        "calibre now contains a new command line tool called epub-fix that can automatically fix
+        common problems in EPUB files that cause them to be rejected by poorly designed publishing services.
+        The tool is plugin based for extensible functionality in the future. Currently, it can fix unmanifested files
+        and workaround the date and svg preserveaspectratio bugs of epubcheck."
+
+    - title: "New icons for the toolbar buttons by Kamil Tatara"
+
+    - title: "Display rating (when available) in cover browser"
+
+    - title: "Clicking on the central cover int the cover browser now opens that book in the viewer"
+
+    - title: "Use the status bar instead of the area to the right of the location view to display status information"
+
+    - title: "Driver for the Pandigital Novel e-book reader"
+
+  bug fixes:
+    - title: "News download: Don not specify a font family for article descriptions"
+
+    - title: "News download: Fix regression introduced in 0.7.0 that broke download of some embedded content feeds"
+
+    - title: "MOBI Output: Partial support for nested superscript and subscripts."
+      tickets: [6132]
+
+    - title: "CHM Input: Fix handling of buggy CHM files with no .hhc"
+      tickets: [6087]
+
+    - title: "EPUB Input: Fix bug in unzipping EPUB files that have been zipped in depth first order."
+      tickets: [6127]
+
+    - title: "TXT Input: Convert HTML entities to characters."
+      tickets: [6114]
+
+    - title: "LRF Input: Handle LRF files with random null bytes in the text"
+      tickets: [6097]
+
+    - title: "Kobo driver: Fix detection of txt/html files on the device"
+
+    - title: "Fix opening of books when calibre library is on an unmapped network share in windows"
+
+    - title: "SONY driver: Only update the timestamp in the XML db for newly added books"
+
+    - title: "Cover browser: Fix rendering of center cover when width of cover browser is less than the width of a single cover"
+
+    - title: "Cover browser: Correct fix for setPixel out of bounds warning causing UI slowdown in calibre"
+
+  new recipes:
+    - title: "evz.ro"
+      author: Darko Miletic
+
+    - title: "Anchorage Daily News, China Economic Net, BBC Chinese and Singtao Daily"
+      author: rty
+
+    - title: Big Oven
+      author: Starson17
+
+  improved recipes:
+    - Haaretz
+    - Editor and Publisher
+    - Estadao
+
+
+- version: 0.7.7
+  date: 2010-07-02
+
+  new features:
+    - title: "Support for the Nokia E52"
+
+    - title: "Searching on the size column"
+
+    - title: "iTunes driver: Add option to disable cover fetching for speeding up the fetching of large book collections"
+
+  bug fixes:
+    - title: "SONY driver: Only update metadata when books are sent to device."
+
+    - title: "TXT Input: Ensure the generated html is splittable"
+      tickets: [5904]
+
+    - title: "Fix infinite loop in default cover generation."
+      tickets: [6061]
+
+    - title: "HTML Input: Fix a parsing bug that was triggered in rare conditions"
+      tickets: [6064]
+
+    - title: "HTML2Zip plugin: Do not replace ligatures"
+      tickets: [6019]
+
+    - title: "iTunes driver: Fix transmission of non integral series numbers"
+      tickets: [6046]
+
+    - title: "Simplify implementation of cover caching and ensure cover browser is updated when covers are changed"
+
+    - title: "PDF metadata: Fix last character corrupted when setting metadata in encrypted files."
+
+    - title: "PDF metadata: Update the version of PoDoFo used to set metadata to 0.8.1. Hopefully that means more PDF files will work"
+
+    - title: "Device drivers: Speedup for dumping metadata cache to devices on Windows XP"
+
+    - title: "EPUB Output: Ensure that language setting is conformant to the specs"
+
+    - title: "MOBI Output: Fix a memory leak and a crash in the palmdoc compression routine"
+
+    - title: "Metadata download: Fix a regression that resulted in a failed download for some books"
+
+  new recipes:
+    - title: "Foreign Policy and Alo!"
+      author: Darko Miletic
+
+    - title: Statesman and ifzm
+      author: rty
+
+  improved recipes:
+    - Akter
+    - The Old New Thing
+
+
+
+- version: 0.7.6
+  date: 2010-06-28
+
+  new features:
+    - title: "Add support for the new firmware of the Azbooka"
+      tickets: [5994]
+
+    - title: "A few speedups for calibre startup, should add up to a few seconds of startup time on slower machines"
+
+    - title: "Support for the Sweem MM300"
+
+    - title: "Add keyboard shorcut for Download metadata and covers"
+
+  bug fixes:
+    - title: "Fix regression in 0.7.5 that broke conversion of malformed HTML files (like those Microsoft Word outputs)"
+      type: major
+      tickets: [5991]
+
+    - title: "Don't download tags from librarything, as the tagging there is not very good"
+
+    - title: "Add mimetype for FB2 so that it can be served by the content server"
+      tickets: [6011]
+
+    - title: "Ensure cover is not resized to less than the available space in the Edit Meta Information dialog"
+      tickets: [6001]
+
+    - title: "SONY driver: Only update collections when sending book to device for the first time"
+
+    - title: "calibre should now work on windows when the location for the library contains non-ascii characters"
+      tickets: [5983]
+
+    - title: "Cover browser once again distorts instead of cropping covers that have an incorrect aspect ratio"
+
+    - title: "ISBNDb metadata plugin: Fix bug causing only first page of results to be fetched"
+
+    - title: "Move iTunes driver to the bottom so that it doesn't interfere with device detection for people that have iphones and an ereader plugged in"
+
+  improved recipes:
+    - Houston Chronicle
+    - Hindu
+    - Times of India
+    - New York Times
+
+  new recipes:
+    - title: Winnipeg Sun
+      author: rty
+
+- version: 0.7.5
+  date: 2010-06-25
+
+  new features:
+    - title: "New driver for the Kobo featuring closer integration with the device."
+
+    - title: "Support for the Dell Streak, Eken Android tablet and the Astak Mentor EB600"
+
+    - title: "New series type custom column"
+
+    - title: "Add option in Send to device menu to connect to iTunes without any iDevice (experimental)"
+
+    - title: "iPad driver: Make setting iTunes Category from series optional. News download now optimizations for iPad output."
+
+    - title: "Add option to disable book cover animation"
+      tickets: [5909]
+
+    - title: "Edit meta information dialog: Remember last used size and splitter position."
+      tickets: [5908]
+
+    - title: "Metadata download: If any results have a published date, ensure they all do"
+
+    - title: "SONY driver: Add a preference setting in Preferences->Add/Save->Send to device to control how colelctions are managed on the device by calibre"
+
+    - title: "Metadata download: Filter out non book results. Also sort results by availability of covers for the isbn"
+      tickets: [5946]
+
+    - title: "Bulk editing for device collections in the device view via the context menu"
+
+  bug fixes:
+    - title: "When converting books using the calibre GUI, set the language of the output book to be the same as the language of the User Interface, instead of undefined. Fixes use of dictionary in iBooks"
+
+    - title: "PDF Output: Fix setting top/bottom margnis has no effect" 
+
+    - title: "Conversion pipeline: Fix typo causing remove footer regex to always fail"
+
+    - title: "Handle device being yanked with queued device jobs gracefully"
+
+    - title: "Conversion pipeline: Handle deeply nested XML structures"
+      tickets: [5931]
+
+    - title: "Conversion pipeline: Fix handling of lists with a specified left margin"
+      tickets: [5877]
+
+    - title: "Restore workaround for ADE buggy rendering of anchors as links. However, make it overridable by extra CSS"
+
+    - title: "Fix LibraryThing metadata download plugin"
+
+    - title: "Fix multiple ratings displayed in Tag Browser for some legacy databases"
+
+    - title: "Fix invocation of postprocess file type plugins plugins"
+
+    - title: "HTML Input: Handle @import directives in linked css files."
+      tickets: [5135]
+
+    - title: "HTML Input: Handle absolute paths in resource links on windows correctly."
+      tickets: [3031]
+
+    - title: "E-book viewer: Handle font-face rules specify multiple families to be substituted"
+
+    - title: "Cover browser: Set aspect ratio of covers to 3:4 instead of 2:3. Crop rather than distort covers whoose aspect ratio is different from this. Antialias the rendering of the central cover"
+
+    - title: "Reset Tag browser if the text in the search box is edited"
+
+    - title: "Fix detection of SD card in Samsung Galaxy windows driver"
+
+  new recipes:
+    - title: "L'Osservatore Romano"
+      author: Darko Miletic
+
+    - title: China Press, London Free Press, People Daily
+      author: rty
+
+  improved recipes:
+    - Zaobao
+    - New Scientist
+    - National Post
+    - London review of books
+
 - version: 0.7.4
  date: 2010-06-19

--- a/resources/images/add_book.svg
+++ b/resources/images/add_book.svg
--- a/resources/images/config.svg
+++ b/resources/images/config.svg
--- a/resources/images/convert.svg
+++ b/resources/images/convert.svg
--- a/resources/images/default_cover.svg
+++ b/resources/images/default_cover.svg
--- a/resources/images/devices/ipad.png
+++ b/resources/images/devices/ipad.png
--- a/resources/images/devices/itunes.png
+++ b/resources/images/devices/itunes.png
--- a/resources/images/dialog_information.svg
+++ b/resources/images/dialog_information.svg
@ -1752,7 +1752,7 @@
     sodipodi:cy="93.331604"
     sodipodi:cx="-166.53223"
     id="path6082"
-     style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
+     style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
     sodipodi:type="arc" /></clipPath><radialGradient
   inkscape:collect="always"
   xlink:href="#linearGradient5990"
@ -2513,7 +2513,7 @@
   transform="matrix(-1.7332269,0,0,1.7332269,-228.13814,-101.76485)"
   clip-path="none" /><path
   sodipodi:type="arc"
-   style="opacity:1;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
+   style="opacity:1;fill:url(#radialGradient6084);fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:1.08779998;stroke-opacity:1;filter:url(#filter6074)"
   id="path3915"
   sodipodi:cx="-166.53223"
   sodipodi:cy="93.331604"
@ -2901,22 +2901,8 @@
   id="g133">
 		<defs
   id="defs135" />
-		<use
-   
-   id="use138"
-   x="0"
-   y="0"
-   width="121"
-   height="120" />
 		<clipPath
   id="XMLID_215_">
-			<use
-   
-   id="use141"
-   x="0"
-   y="0"
-   width="121"
-   height="120" />
 		</clipPath>
 		<g
   clip-path="url(#XMLID_215_)"
--- a/resources/images/dialog_question.svg
+++ b/resources/images/dialog_question.svg
@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Generator: Adobe Illustrator 12.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 51448)  -->
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://web.resource.org/cc/"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   version="1.0"
+   id="Livello_1"
+   width="128"
+   height="128"
+   viewBox="0 0 139 139"
+   overflow="visible"
+   enable-background="new 0 0 139 139"
+   xml:space="preserve"
+   sodipodi:version="0.32"
+   inkscape:version="0.45+devel"
+   sodipodi:docname="system-help.svgz"
+   inkscape:output_extension="org.inkscape.output.svgz.inkscape"
+   style="overflow:visible"><metadata
+   id="metadata3164"><rdf:RDF><cc:Work
+       rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
+         rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
+   id="defs3162"><filter
+     inkscape:collect="always"
+     x="-0.132641"
+     width="1.265282"
+     y="-0.34752154"
+     height="1.6950431"
+     id="filter3547"><feGaussianBlur
+       inkscape:collect="always"
+       stdDeviation="2.7512044"
+       id="feGaussianBlur3549" /></filter><filter
+     inkscape:collect="always"
+     id="filter5097"><feGaussianBlur
+       inkscape:collect="always"
+       stdDeviation="2.32"
+       id="feGaussianBlur5099" /></filter><filter
+     inkscape:collect="always"
+     x="-0.143268"
+     width="1.286536"
+     y="-0.072184406"
+     height="1.1443688"
+     id="filter5125"><feGaussianBlur
+       inkscape:collect="always"
+       stdDeviation="1.91024"
+       id="feGaussianBlur5127" /></filter></defs><sodipodi:namedview
+   inkscape:window-height="697"
+   inkscape:window-width="1024"
+   inkscape:pageshadow="2"
+   inkscape:pageopacity="0.0"
+   guidetolerance="10.0"
+   gridtolerance="10.0"
+   objecttolerance="10.0"
+   borderopacity="1.0"
+   bordercolor="#666666"
+   pagecolor="#ffffff"
+   id="base"
+   inkscape:zoom="2.9352518"
+   inkscape:cx="99.496726"
+   inkscape:cy="69.329657"
+   inkscape:window-x="0"
+   inkscape:window-y="0"
+   inkscape:current-layer="Livello_1"
+   height="128px"
+   width="128px" />
+<filter
+   id="AI_Sfocatura_4">
+	<feGaussianBlur
+   stdDeviation="4"
+   id="feGaussianBlur3096" />
+</filter>
+<filter
+   id="AI_Sfocatura_2">
+	<feGaussianBlur
+   stdDeviation="2"
+   id="feGaussianBlur3099" />
+</filter>
+<radialGradient
+   id="XMLID_12_"
+   cx="69.600098"
+   cy="69.576698"
+   r="58"
+   gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)"
+   gradientUnits="userSpaceOnUse">
+	<stop
+   offset="0"
+   style="stop-color:#000000"
+   id="stop3102" />
+	<stop
+   offset="1"
+   style="stop-color:#000000;stop-opacity:0;"
+   id="stop3104" />
+</radialGradient>
+<circle
+   sodipodi:ry="58"
+   sodipodi:rx="58"
+   sodipodi:cy="69.599998"
+   sodipodi:cx="69.599998"
+   style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)"
+   id="circle5091"
+   r="58"
+   cy="69.599998"
+   cx="69.599998"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" /><ellipse
+   cx="69.599998"
+   cy="122.173"
+   rx="58"
+   ry="10.573"
+   id="ellipse3106"
+   style="opacity:0.6;fill:url(#XMLID_12_)"
+   sodipodi:cx="69.599998"
+   sodipodi:cy="122.173"
+   sodipodi:rx="58"
+   sodipodi:ry="10.573"
+   transform="translate(-9.9998474e-2,1.9102535)" />
+
+<radialGradient
+   id="XMLID_13_"
+   cx="69.600098"
+   cy="69.600098"
+   r="58"
+   gradientUnits="userSpaceOnUse">
+	<stop
+   offset="0.6154"
+   style="stop-color:#EEEEEE"
+   id="stop3113" />
+	<stop
+   offset="0.8225"
+   style="stop-color:#DDDDDD"
+   id="stop3115" />
+	<stop
+   offset="1"
+   style="stop-color:#FFFFFF"
+   id="stop3117" />
+</radialGradient>
+<circle
+   cx="69.599998"
+   cy="69.599998"
+   r="58"
+   id="circle3119"
+   style="fill:url(#XMLID_13_)"
+   sodipodi:cx="69.599998"
+   sodipodi:cy="69.599998"
+   sodipodi:rx="58"
+   sodipodi:ry="58"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)" />
+<linearGradient
+   id="XMLID_14_"
+   gradientUnits="userSpaceOnUse"
+   x1="27.6001"
+   y1="69.600098"
+   x2="111.6001"
+   y2="69.600098"
+   gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
+	<stop
+   offset="0"
+   style="stop-color:#2A94EC"
+   id="stop3122" />
+	<stop
+   offset="1"
+   style="stop-color:#0057AE"
+   id="stop3124" />
+</linearGradient>
+<path
+   d="M 26.062502,67.328127 C 26.062502,92.477355 46.522651,112.9375 71.671877,112.9375 C 96.821104,112.9375 117.28125,92.477355 117.28125,67.328127 C 117.28125,42.178901 96.821104,21.718753 71.671877,21.718753 C 46.522651,21.718753 26.062502,42.178901 26.062502,67.328127 z"
+   id="path3126"
+   style="fill:url(#XMLID_14_)" />
+<g
+   id="circle22111"
+   cy="92"
+   rx="36"
+   ry="36"
+   cx="343.99899"
+   enable-background="new    "
+   style="opacity:0.3;filter:url(#filter3547)"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
+	<path
+   d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 C 43.007,92.976 50.885,87.768 64.16,86.41 C 77.434,85.054 91.079,88.058 94.637,93.126 C 98.193,98.194 90.315,103.401 77.041,104.759 z"
+   id="path3129"
+   style="fill:#a8dde0" />
+</g>
+<linearGradient
+   id="circle16776_1_"
+   gradientUnits="userSpaceOnUse"
+   x1="135.5601"
+   y1="417.66461"
+   x2="161.87621"
+   y2="417.66461"
+   gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
+	<stop
+   offset="0"
+   style="stop-color:#FFFFFF"
+   id="stop3132" />
+	<stop
+   offset="1"
+   style="stop-color:#ffffff;stop-opacity:0;"
+   id="stop3134" />
+</linearGradient>
+<path
+   id="circle16776"
+   enable-background="new    "
+   d="M 71.671877,24.06655 C 50.288682,24.06655 32.41958,38.77123 28.113838,58.349597 C 36.698174,66.142284 52.986151,54.358777 71.671877,54.358777 C 90.357604,54.358777 106.64666,66.142284 115.22991,58.349597 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
+   style="opacity:0.8;fill:url(#circle16776_1_)" />
+<g
+   id="g3137"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
+	<defs
+   id="defs3139"><path
+     id="XMLID_10_"
+     d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z" /></defs>
+	<clipPath
+   id="XMLID_6_">
+		<use
+   xlink:href="#XMLID_10_"
+   id="use3143"
+   x="0"
+   y="0"
+   width="139"
+   height="139" />
+	</clipPath>
+	<g
+   clip-path="url(#XMLID_6_)"
+   id="g3145"
+   style="filter:url(#AI_Sfocatura_2)">
+		<path
+   d="M 27.6,69.6 C 27.6,92.759 46.441,111.6 69.6,111.6 C 92.759,111.6 111.6,92.759 111.6,69.6 C 111.6,46.441 92.759,27.6 69.6,27.6 C 46.441,27.6 27.6,46.441 27.6,69.6 z"
+   id="path3147"
+   style="fill:none;stroke:#00316e;stroke-width:2" />
+	</g>
+</g>
+
+
+
+<g
+   transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
+   id="g5119"
+   style="fill:#00316e;filter:url(#filter5125)"><path
+     style="fill:#00316e"
+     d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
+     id="path5121" /><circle
+     style="fill:#00316e"
+     sodipodi:ry="8"
+     sodipodi:rx="8"
+     sodipodi:cy="93.599998"
+     sodipodi:cx="69.599998"
+     cx="69.599998"
+     cy="93.599998"
+     r="8"
+     id="circle5123" /></g><g
+   id="g5101"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"><path
+     id="path3157"
+     d="M 63.37,80.089 L 63.192,77.746 C 63.012,73.148 64.44,68.462 68.451,63.684 C 71.304,60.26 73.62,57.286 73.62,54.221 C 73.62,51.157 71.571,48.994 67.202,48.903 C 64.173,48.903 60.696,49.895 58.289,51.517 L 55.348,41.784 C 58.556,39.89 63.815,38.088 70.233,38.088 C 81.91,38.088 87.348,44.668 87.348,52.058 C 87.348,58.997 83.069,63.415 79.681,67.289 C 76.472,70.894 75.046,74.41 75.135,78.466 L 75.135,80.088 L 63.37,80.088 L 63.37,80.089 z"
+     style="fill:#ffffff" /><circle
+     id="circle3159"
+     r="8"
+     cy="93.599998"
+     cx="69.599998"
+     sodipodi:cx="69.599998"
+     sodipodi:cy="93.599998"
+     sodipodi:rx="8"
+     sodipodi:ry="8"
+     style="fill:#ffffff" /></g>
+</svg>
--- a/resources/images/edit_copy.svg
+++ b/resources/images/edit_copy.svg
--- a/resources/images/edit_input.svg
+++ b/resources/images/edit_input.svg
--- a/resources/images/help.svg
+++ b/resources/images/help.svg
@ -0,0 +1,203 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink"
+   version="1.0"
+   width="128"
+   height="128"
+   viewBox="0 0 139 139"
+   id="Livello_1"
+   xml:space="preserve"
+   style="overflow:visible"><defs
+   id="defs3162"><filter
+     x="-0.132641"
+     y="-0.34752154"
+     width="1.265282"
+     height="1.6950431"
+     color-interpolation-filters="sRGB"
+     id="filter3547"><feGaussianBlur
+       id="feGaussianBlur3549"
+       stdDeviation="2.7512044" /></filter><filter
+     color-interpolation-filters="sRGB"
+     id="filter5097"><feGaussianBlur
+       id="feGaussianBlur5099"
+       stdDeviation="2.32" /></filter><filter
+     x="-0.143268"
+     y="-0.072184406"
+     width="1.286536"
+     height="1.1443688"
+     color-interpolation-filters="sRGB"
+     id="filter5125"><feGaussianBlur
+       id="feGaussianBlur5127"
+       stdDeviation="1.91024" /></filter></defs>
+<filter
+   color-interpolation-filters="sRGB"
+   id="AI_Sfocatura_4">
+	<feGaussianBlur
+   id="feGaussianBlur3096"
+   stdDeviation="4" />
+</filter>
+<filter
+   color-interpolation-filters="sRGB"
+   id="AI_Sfocatura_2">
+	<feGaussianBlur
+   id="feGaussianBlur3099"
+   stdDeviation="2" />
+</filter>
+<radialGradient
+   cx="69.600098"
+   cy="69.576698"
+   r="58"
+   id="XMLID_12_"
+   gradientUnits="userSpaceOnUse"
+   gradientTransform="matrix(1,0,0,-0.1823,0,134.8566)">
+	<stop
+   id="stop3102"
+   style="stop-color:#000000;stop-opacity:1"
+   offset="0" />
+	<stop
+   id="stop3104"
+   style="stop-color:#000000;stop-opacity:0"
+   offset="1" />
+</radialGradient>
+<circle
+   cx="69.599998"
+   cy="69.599998"
+   r="58"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
+   id="circle5091"
+   style="opacity:0.7;fill:#000000;fill-opacity:1;stroke:none;filter:url(#filter5097)" />
+
+<radialGradient
+   cx="69.600098"
+   cy="69.600098"
+   r="58"
+   id="XMLID_13_"
+   gradientUnits="userSpaceOnUse">
+	<stop
+   id="stop3113"
+   style="stop-color:#eeeeee;stop-opacity:1"
+   offset="0.61540002" />
+	<stop
+   id="stop3115"
+   style="stop-color:#dddddd;stop-opacity:1"
+   offset="0.82249999" />
+	<stop
+   id="stop3117"
+   style="stop-color:#ffffff;stop-opacity:1"
+   offset="1" />
+</radialGradient>
+<circle
+   cx="69.599998"
+   cy="69.599998"
+   r="58"
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
+   id="circle3119"
+   style="fill:url(#XMLID_13_)" />
+<linearGradient
+   x1="27.6001"
+   y1="69.600098"
+   x2="111.6001"
+   y2="69.600098"
+   id="XMLID_14_"
+   gradientUnits="userSpaceOnUse"
+   gradientTransform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)">
+	<stop
+   id="stop3122"
+   style="stop-color:#2a94ec;stop-opacity:1"
+   offset="0" />
+	<stop
+   id="stop3124"
+   style="stop-color:#0057ae;stop-opacity:1"
+   offset="1" />
+</linearGradient>
+<path
+   d="m 26.062502,67.328127 c 0,25.149228 20.460149,45.609373 45.609375,45.609373 25.149227,0 45.609373,-20.460145 45.609373,-45.609373 0,-25.149226 -20.460146,-45.609374 -45.609373,-45.609374 -25.149226,0 -45.609375,20.460148 -45.609375,45.609374 z"
+   id="path3126"
+   style="fill:url(#XMLID_14_)" />
+<g
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
+   id="circle22111"
+   style="opacity:0.3;filter:url(#filter3547)">
+	<path
+   d="M 77.041,104.759 C 63.767,106.115 50.122,103.11 46.565,98.042 43.007,92.976 50.885,87.768 64.16,86.41 c 13.274,-1.356 26.919,1.648 30.477,6.716 3.556,5.068 -4.322,10.275 -17.596,11.633 z"
+   id="path3129"
+   style="fill:#a8dde0" />
+</g>
+<linearGradient
+   x1="135.5601"
+   y1="417.66461"
+   x2="161.87621"
+   y2="417.66461"
+   id="circle16776_1_"
+   gradientUnits="userSpaceOnUse"
+   gradientTransform="matrix(0,1.7280523,1.7280523,0,-650.07477,-218.71693)">
+	<stop
+   id="stop3132"
+   style="stop-color:#ffffff;stop-opacity:1"
+   offset="0" />
+	<stop
+   id="stop3134"
+   style="stop-color:#ffffff;stop-opacity:0"
+   offset="1" />
+</linearGradient>
+<path
+   d="m 71.671877,24.06655 c -21.383195,0 -39.252297,14.70468 -43.558039,34.283047 8.584336,7.792687 24.872313,-3.99082 43.558039,-3.99082 18.685727,0 34.974783,11.783507 43.558033,3.99082 C 110.92417,38.77123 93.056158,24.06655 71.671877,24.06655 z"
+   id="circle16776"
+   style="opacity:0.8;fill:url(#circle16776_1_)" />
+<g
+   transform="matrix(1.0859375,0,0,1.0859375,-3.9093733,-8.2531233)"
+   id="g3137">
+	<defs
+   id="defs3139"><path
+     d="m 27.6,69.6 c 0,23.159 18.841,42 42,42 23.159,0 42,-18.841 42,-42 0,-23.159 -18.841,-42 -42,-42 -23.159,0 -42,18.841 -42,42 z"
+     id="XMLID_10_" /></defs>
+	<clipPath
+   id="XMLID_6_">
+		<use
+   id="use3143"
+   x="0"
+   y="0"
+   width="139"
+   height="139"
+   xlink:href="#XMLID_10_" />
+	</clipPath>
+	<g
+   clip-path="url(#XMLID_6_)"
+   id="g3145"
+   style="filter:url(#AI_Sfocatura_2)">
+		<path
+   d="m 27.6,69.6 c 0,23.159 18.841,42 42,42 23.159,0 42,-18.841 42,-42 0,-23.159 -18.841,-42 -42,-42 -23.159,0 -42,18.841 -42,42 z"
+   id="path3147"
+   style="fill:none;stroke:#00316e;stroke-width:2" />
+	</g>
+</g>
+
+
+
+<g
+   transform="matrix(1.0859375,0,0,1.1113796,-3.201342,-9.3177223)"
+   id="g5119"
+   style="fill:#00316e;filter:url(#filter5125)"><path
+     d="m 63.37,80.089 -0.178,-2.343 c -0.18,-4.598 1.248,-9.284 5.259,-14.062 2.853,-3.424 5.169,-6.398 5.169,-9.463 0,-3.064 -2.049,-5.227 -6.418,-5.318 -3.029,0 -6.506,0.992 -8.913,2.614 l -2.941,-9.733 c 3.208,-1.894 8.467,-3.696 14.885,-3.696 11.677,0 17.115,6.58 17.115,13.97 0,6.939 -4.279,11.357 -7.667,15.231 -3.209,3.605 -4.635,7.121 -4.546,11.177 l 0,1.622 -11.765,0 0,0.001 z"
+     id="path5121"
+     style="fill:#00316e" /><circle
+     cx="69.599998"
+     cy="93.599998"
+     r="8"
+     id="circle5123"
+     style="fill:#00316e" /></g><g
+   transform="matrix(1.0859375,0,0,1.0859375,-3.201342,-8.2531233)"
+   id="g5101"><path
+     d="m 63.37,80.089 -0.178,-2.343 c -0.18,-4.598 1.248,-9.284 5.259,-14.062 2.853,-3.424 5.169,-6.398 5.169,-9.463 0,-3.064 -2.049,-5.227 -6.418,-5.318 -3.029,0 -6.506,0.992 -8.913,2.614 l -2.941,-9.733 c 3.208,-1.894 8.467,-3.696 14.885,-3.696 11.677,0 17.115,6.58 17.115,13.97 0,6.939 -4.279,11.357 -7.667,15.231 -3.209,3.605 -4.635,7.121 -4.546,11.177 l 0,1.622 -11.765,0 0,0.001 z"
+     id="path3157"
+     style="fill:#ffffff" /><circle
+     cx="69.599998"
+     cy="93.599998"
+     r="8"
+     id="circle3159"
+     style="fill:#ffffff" /></g>
+</svg>
--- a/resources/images/news.svg
+++ b/resources/images/news.svg
--- a/resources/images/news/alo_novine.png
+++ b/resources/images/news/alo_novine.png
--- a/resources/images/news/elpais_impreso.png
+++ b/resources/images/news/elpais_impreso.png
--- a/resources/images/news/evz.ro.png
+++ b/resources/images/news/evz.ro.png
--- a/resources/images/news/haaretz.png
+++ b/resources/images/news/haaretz.png
--- a/resources/images/news/lrb.png
+++ b/resources/images/news/lrb.png
--- a/resources/images/news/lrb_payed.png
+++ b/resources/images/news/lrb_payed.png
--- a/resources/images/save.svg
+++ b/resources/images/save.svg
--- a/resources/images/sync.svg
+++ b/resources/images/sync.svg
--- a/resources/images/trash.svg
+++ b/resources/images/trash.svg
--- a/resources/images/view.svg
+++ b/resources/images/view.svg
--- a/resources/recipes/akter.recipe
+++ b/resources/recipes/akter.recipe
@ -15,7 +15,7 @@ class Akter(BasicNewsRecipe):
    category              = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
    oldest_article        = 8
    max_articles_per_feed = 100
-    no_stylesheets        = False
+    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    masthead_url          = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
@ -23,9 +23,9 @@ class Akter(BasicNewsRecipe):
    publication_type      = 'magazine'
    remove_empty_feeds    = True
    PREFIX                 = 'http://www.akter.co.rs'
-    extra_css             = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
+    extra_css             = """
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
-                                .article_description,body,.lokacija{font-family: Arial,Helvetica,sans1,sans-serif}
+                                .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
                                .color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
                                border-left: 1px solid #D00000; color: #D00000}
                                img{margin-bottom: 0.8em} """
--- a/resources/recipes/alo_novine.recipe
+++ b/resources/recipes/alo_novine.recipe
@ -0,0 +1,65 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+www.alo.rs
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Alo_Novine(BasicNewsRecipe):
+    title                 = 'Alo!'
+    __author__            = 'Darko Miletic'
+    description           = "News Portal from Serbia"
+    publisher             = 'Alo novine d.o.o.'
+    category              = 'news, politics, Serbia'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    delay                 = 4
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'sr'
+    extra_css             = """
+                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
+                                .lead {font-size: 1.3em}
+                                h1{color: #DB0700}
+                                .article_uvod{font-style: italic; font-size: 1.2em}
+                                img{margin-bottom: 0.8em} """
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    remove_tags = [dict(name=['object','link','embed'])]
+    remove_attributes = ['height','width']
+
+    feeds = [
+               (u'Najnovije Vijesti', u'http://www.alo.rs/rss/danasnje_vesti')
+              ,(u'Politika'         , u'http://www.alo.rs/rss/politika')
+              ,(u'Vesti'            , u'http://www.alo.rs/rss/vesti')
+              ,(u'Sport'            , u'http://www.alo.rs/rss/sport')
+              ,(u'Ljudi'            , u'http://www.alo.rs/rss/ljudi')
+              ,(u'Saveti'           , u'http://www.alo.rs/rss/saveti')
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def print_version(self, url):
+        artl = url.rpartition('/')[0]
+        artid = artl.rpartition('/')[2]
+        return 'http://www.alo.rs/resources/templates/tools/print.php?id=' + artid
+
+    def image_url_processor(self, baseurl, url):
+        return url.replace('alo.rs//','alo.rs/')
+
--- a/resources/recipes/anchorage_daily.recipe
+++ b/resources/recipes/anchorage_daily.recipe
@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1278347258(BasicNewsRecipe):
+    title          = u'Anchorage Daily News'
+    __author__ = 'rty'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
+	(u'Business', u'http://www.adn.com/money/index.xml'),
+	(u'Sports', u'http://www.adn.com/sports/index.xml'),
+	(u'Politics', u'http://www.adn.com/politics/index.xml'),
+	(u'Lifestyles', u'http://www.adn.com/life/index.xml'),
+	(u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
+	]
+    description           = ''''Alaska's Newspaper'''
+    publisher             = 'http://www.adn.com'
+    category              = 'news, Alaska, Anchorage'
+    language = 'en'
+    extra_css = '''
+                p{font-weight: normal;text-align: justify}
+               '''
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en'
+    encoding               = 'latin-1'
+    conversion_options = {'linearize_tables':True}
+    masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
+
+    keep_only_tags = [
+                              dict(name='div', attrs={'class':'left_col story_mainbar'}),
+                               ]
+    remove_tags = [
+                              dict(name='div', attrs={'class':'story_tools'}),
+                              dict(name='p', attrs={'class':'ad_label'}),
+                               ]
+    remove_tags_after = [
+                              dict(name='div', attrs={'class':'advertisement'}),
+                               ]
--- a/resources/recipes/bbc_chinese.recipe
+++ b/resources/recipes/bbc_chinese.recipe
@ -0,0 +1,39 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277443634(BasicNewsRecipe):
+    title          = u'BBC Chinese'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds          = [
+	(u'\u4e3b\u9875', u'http://www.bbc.co.uk/zhongwen/simp/index.xml'),
+	(u'\u56fd\u9645\u65b0\u95fb', u'http://www.bbc.co.uk/zhongwen/simp/world/index.xml'),
+	(u'\u4e24\u5cb8\u4e09\u5730', u'http://www.bbc.co.uk/zhongwen/simp/china/index.xml'),
+	(u'\u91d1\u878d\u8d22\u7ecf', u'http://www.bbc.co.uk/zhongwen/simp/business/index.xml'),
+	(u'\u7f51\u4e0a\u4e92\u52a8', u'http://www.bbc.co.uk/zhongwen/simp/interactive/index.xml'),
+	(u'\u97f3\u89c6\u56fe\u7247', u'http://www.bbc.co.uk/zhongwen/simp/multimedia/index.xml'),
+	(u'\u5206\u6790\u8bc4\u8bba', u'http://www.bbc.co.uk/zhongwen/simp/indepth/index.xml')
+	]
+    extra_css = '''
+    	@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
+	body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
+                    h1 {font-family: 'DroidFont', serif;}\n
+                    .articledescription {font-family: 'DroidFont', serif;}
+            '''
+    __author__            = 'rty'
+    __version__            = '1.0'
+    language = 'zh'
+    pubisher  = 'British Broadcasting Corporation'
+    description           = 'BBC news in Chinese'
+    category              = 'News, Chinese'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    encoding               = 'UTF-8'
+    conversion_options = {'linearize_tables':True}
+    masthead_url = 'http://wscdn.bbc.co.uk/zhongwen/simp/images/1024/brand.jpg'
+    keep_only_tags = [
+                              dict(name='h1'),
+                              dict(name='p', attrs={'class':['primary-topic','summary']}),
+                              dict(name='div', attrs={'class':['bodytext','datestamp']}),
+                              ]
--- a/resources/recipes/big_oven.recipe
+++ b/resources/recipes/big_oven.recipe
@ -0,0 +1,64 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BigOven(BasicNewsRecipe):
+    title               = 'BigOven'
+    __author__          = 'Starson17'
+    description         = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
+    language            = 'en'
+    category            = 'news, food, recipes, gourmet'
+    publisher           = 'Starson17'
+    use_embedded_content= False
+    no_stylesheets      = True
+    oldest_article      = 24
+    remove_javascript   = True
+    remove_empty_feeds    = True
+    cover_url           = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
+    max_articles_per_feed = 30
+    needs_subscription = True
+
+    conversion_options = {'linearize_tables'  : True
+                        , 'comment'           : description
+                        , 'tags'              : category
+                        , 'publisher'         : publisher
+                        , 'language'          : language
+                        }
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://www.bigoven.com/')
+            br.select_form(name='form1')
+            br['TopMenu_bo1$email']  = self.username
+            br['TopMenu_bo1$password'] = self.password
+            br.submit()
+        return br
+
+    remove_attributes = ['style', 'font']
+
+    keep_only_tags     = [dict(name='h1')
+                          ,dict(name='div', attrs={'class':'img'})
+                          ,dict(name='div', attrs={'id':'intro'})
+                          ]
+
+    remove_tags = [dict(name='div', attrs={'style':["overflow: visible;"]})
+                   ,dict(name='div', attrs={'class':['ctas']})
+                   #,dict(name='a', attrs={'class':['edit']})
+                   ,dict(name='p', attrs={'class':['byline']})
+                   ]
+
+    feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
+
+    def preprocess_html(self, soup):
+        for tag in soup.findAll(name='a', attrs={'class':['edit']}):
+          tag.parent.extract()
+        for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
+          tag.replaceWith(tag.string)
+        return soup
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:medium;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+		            '''
+
--- a/resources/recipes/china_economic_net.recipe
+++ b/resources/recipes/china_economic_net.recipe
@ -0,0 +1,39 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1278162597(BasicNewsRecipe):
+    __author__            = 'rty'
+    title          = u'China Economic Net'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    pubisher  = 'www.ce.cn - China Economic net - Beijing'
+    description           = 'China Economic Net Magazine'
+    category              = 'Economic News Magazine, Chinese, China'
+    feeds          = [
+	(u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
+	(u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
+	(u'Health 健康', u'http://www.ce.cn/health/index_6294.xml'),
+	(u'Technology 科技', u'http://sci.ce.cn/mainpage/index_6307.xml'),
+	(u'Domestic Politics 国内时政', u'http://www.ce.cn/xwzx/gnsz/index_6273.xml')
+	]
+    masthead_url = 'http://finance.ce.cn/images/08mdy_logo.gif'
+    extra_css = '''
+    	@font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
+	body {margin-right: 8pt; font-family: 'DroidFont', serif;}\n
+                    h1 {font-family: 'DroidFont', serif;}\n
+                    .articledescription {font-family: 'DroidFont', serif;}
+            '''
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'zh-cn'
+    encoding   = 'gb2312'
+    conversion_options = {'linearize_tables':True}
+
+
+    keep_only_tags = [
+
+	 dict(name='h1', attrs={'id':'articleTitle'}),
+	 dict(name='div', attrs={'class':'laiyuan'}),
+                     dict(name='div', attrs={'id':'articleText'}),
+                               ]
--- a/resources/recipes/china_press.recipe
+++ b/resources/recipes/china_press.recipe
@ -0,0 +1,71 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277228948(BasicNewsRecipe):
+    title          = u'China Press USA'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    __author__            = 'rty'
+    __version__            = '1.0'
+    language = 'zh'
+    pubisher  = 'www.chinapressusa.com'
+    description           = 'Overseas Chinese Network Newspaper in the USA'
+    category              = 'News in Chinese, USA'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    #encoding               = 'GB2312'
+    encoding               = 'UTF-8'
+    conversion_options = {'linearize_tables':True}
+    masthead_url ='http://www.chinapressusa.com/common/images/logo.gif'
+    extra_css = '''
+             @font-face { font-family: "DroidFont", serif, sans-serif;  src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
+             body {
+                  margin-right: 8pt;
+                  font-family: 'DroidFont', serif;}
+              h1  {font-family: 'DroidFont', serif, sans-serif}
+            .show {font-family: 'DroidFont', serif, sans-serif}
+            '''
+    feeds          = [
+	(u'\u65b0\u95fb\u9891\u9053', u'http://news.uschinapress.com/news.xml'),
+	(u'\u534e\u4eba\u9891\u9053', u'http://chinese.uschinapress.com/chinese.xml'),
+	(u'\u8bc4\u8bba\u9891\u9053', u'http://review.uschinapress.com/review.xml'),
+	]
+    keep_only_tags = [
+                              dict(name='div', attrs={'class':'show'}),
+                               ]
+    remove_tags = [
+     #               dict(name='table', attrs={'class':'xle'}),
+                    dict(name='div', attrs={'class':'time'}),
+                         ]
+    remove_tags_after = [
+                  dict(name='div', attrs={'class':'bank17'}),
+         #         dict(name='a', attrs={'class':'ab12'}),
+                         ]
+
+
+    def append_page(self, soup, appendtag, position):
+        pager = soup.find('div',attrs={'id':'displaypagenum'})
+        if pager:
+           nexturl = self.INDEX + pager.a['href']
+           soup2 = self.index_to_soup(nexturl)
+           texttag = soup2.find('div', attrs={'class':'show'})
+           for it in texttag.findAll(style=True):
+               del it['style']
+           newpos = len(texttag.contents)
+           self.append_page(soup2,texttag,newpos)
+           texttag.extract()
+           appendtag.insert(position,texttag)
+
+
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="Content-Language" content="zh-CN"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
+        soup.head.insert(0,mtag)
+
+        for item in soup.findAll(style=True):
+            del item['style']
+        self.append_page(soup, soup.body, 3)
+        pager = soup.find('div',attrs={'id':'displaypagenum'})
+        if pager:
+           pager.extract()
+        return soup
--- a/resources/recipes/editor_and_publisher.recipe
+++ b/resources/recipes/editor_and_publisher.recipe
@ -1,14 +1,29 @@
-import re
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__copyright__ = '2010 elsuave'
+
 from calibre.web.feeds.news import BasicNewsRecipe
 class EandP(BasicNewsRecipe):
    title              = u'Editor and Publisher'
-    __author__         = u'Xanthan Gum'
+    __author__         = u'elsuave (modified from Xanthan Gum)'
    description        = 'News about newspapers and journalism.'
+    publisher             = 'Editor and Publisher'
+    category              = 'news, journalism, industry'
    language = 'en'
+    max_articles_per_feed = 25
    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    cover_url             = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
+    remove_javascript     = True

-    oldest_article = 7
-    max_articles_per_feed = 100
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    # Font formatting code borrowed from kwetal

@ -18,17 +33,21 @@ class EandP(BasicNewsRecipe):
                 h2{font-size: large;}
                '''

-    # Delete everything before the article
+    # Keep only div:itemmgap

-    remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
+    keep_only_tags = [
+                          dict(name='div', attrs={'class':'itemmgap'})
+                          ]

-    # Delete everything after the article
+    # Remove commenting/social media lins

-    preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
-                           lambda match: '</body>'),]
+    remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
+
+
+    feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
+             (u'Business News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
+             (u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
+             (u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
+             (u'Technology News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
+             (u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]

-    feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
-             (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
-             (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
-             (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
-             (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
--- a/resources/recipes/elpais_impreso.recipe
+++ b/resources/recipes/elpais_impreso.recipe
@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.elpais.com/diario/
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElPaisImpresa(BasicNewsRecipe):
+    title                 = 'El País - edicion impresa'
+    __author__            = 'Darko Miletic'
+    description           = 'el periodico global en Español'
+    publisher             = 'EDICIONES EL PAIS, S.L.'
+    category              = 'news, politics,Spain,actualidad,noticias,informacion,videos,fotografias,audios,graficos,nacional,internacional,deportes,economia,tecnologia,cultura,gente,television,sociedad,opinion,blogs,foros,chats,encuestas,entrevistas,participacion'
+    no_stylesheets        = True
+    encoding              = 'latin1'
+    use_embedded_content  = False
+    language              = 'es'
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.elpais.com/im/tit_logo_global.gif'
+    index                 = 'http://www.elpais.com/diario/'
+    extra_css             = ' p{text-align: justify} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} '
+
+    conversion_options = {
+                          'comment'      : description
+                        , 'tags'         : category
+                        , 'publisher'    : publisher
+                        , 'language'     : language
+                        }
+
+    feeds          = [
+                        (u'Internacional'     , index + u'internacional/'     )
+                       ,(u'España'            , index + u'espana/'            )
+                       ,(u'Economia'          , index + u'economia/'          )
+                       ,(u'Opinion'           , index + u'opinion/'           )
+                       ,(u'Viñetas'           , index + u'vineta/'            )
+                       ,(u'Sociedad'          , index + u'sociedad/'          )
+                       ,(u'Cultura'           , index + u'cultura/'           )
+                       ,(u'Tendencias'        , index + u'tendencias/'        )
+                       ,(u'Gente'             , index + u'gente/'             )
+                       ,(u'Obituarios'        , index + u'obituarios/'        )
+                       ,(u'Deportes'          , index + u'deportes/'          )
+                       ,(u'Pantallas'         , index + u'radioytv/'          )
+                       ,(u'Ultima'            , index + u'ultima/'            )
+                       ,(u'Educacion'         , index + u'educacion/'         )
+                       ,(u'Saludo'            , index + u'salud/'             )
+                       ,(u'Ciberpais'         , index + u'ciberpais/'         )
+                       ,(u'EP3'               , index + u'ep3/'               )
+                       ,(u'Cine'              , index + u'cine/'              )
+                       ,(u'Babelia'           , index + u'babelia/'           )
+                       ,(u'El viajero'        , index + u'viajero/'           )
+                       ,(u'Negocios'          , index + u'negocios/'          )
+                       ,(u'Domingo'           , index + u'domingo/'           )
+                       ,(u'El Pais semanal'   , index + u'eps/'               )
+                       ,(u'Quadern Catalunya' , index + u'quadern-catalunya/' )
+                     ]
+
+    keep_only_tags=[dict(attrs={'class':['cabecera_noticia','contenido_noticia']})]
+    remove_attributes=['width','height']
+    remove_tags=[dict(name='link')]
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('a',attrs={'class':['g19r003','g19i003','g17r003','g17i003']}):
+                url   = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
+                title = self.tag_to_string(item)
+                date  = strftime(self.timefmt)
+                articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':''
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
+    def print_version(self, url):
+        return url + '?print=1'
--- a/resources/recipes/estadao.recipe
+++ b/resources/recipes/estadao.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010, elsuave'
 '''
 estadao.com.br
 '''
@ -10,12 +10,12 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Estadao(BasicNewsRecipe):
    title                 = 'O Estado de S. Paulo'
-    __author__            = 'Darko Miletic'
+    __author__            = 'elsuave (modified from Darko Miletic)'
    description           = 'News from Brasil in Portuguese'
    publisher             = 'O Estado de S. Paulo'
    category              = 'news, politics, Brasil'
    oldest_article        = 2
-    max_articles_per_feed = 100
+    max_articles_per_feed = 25
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
@ -30,13 +30,14 @@ class Estadao(BasicNewsRecipe):

    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

-    keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
+    keep_only_tags = [
+                          dict(name='div', attrs={'class':['bb-md-noticia','c5']})
+                     ]

    remove_tags = [
                     dict(name=['script','object','form','ul'])
-                    ,dict(name='div', attrs={'id':['votacao','estadaohoje']})
-                    ,dict(name='p', attrs={'id':'ctrl_texto'})
-                    ,dict(name='p', attrs={'class':'texto'})
+                    ,dict(name='div', attrs={'class':['fnt2 Color_04 bold','right fnt2 innerTop15 dvTmFont','™_01 right outerLeft15','tituloBox','tags']})
+                    ,dict(name='div', attrs={'id':['bb-md-noticia-subcom']})
                  ]

    feeds = [
@ -51,13 +52,12 @@ class Estadao(BasicNewsRecipe):
              ,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
            ]

-    def preprocess_html(self, soup):
-        ifr = soup.find('iframe')
-        if ifr:
-           ifr.extract()
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+

    language = 'pt'

+    def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        if '/Multimidia/' not in url:
+            return url
+
--- a/resources/recipes/evz.ro.recipe
+++ b/resources/recipes/evz.ro.recipe
@ -0,0 +1,52 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+evz.ro
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class EVZ_Ro(BasicNewsRecipe):
+    title                 = 'evz.ro'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Romania'
+    publisher             = 'evz.ro'
+    category              = 'news, politics, Romania'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'ro'
+    masthead_url          = 'http://www.evz.ro/fileadmin/images/logo.gif'
+    extra_css             = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    preprocess_regexps = [
+         (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
+        ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
+    ]
+
+    remove_tags       = [
+                          dict(name=['form','embed','iframe','object','base','link','script','noscript'])
+                         ,dict(attrs={'class':['section','statsInfo','email il']})
+                         ,dict(attrs={'id'   :'gallery'})
+                        ]
+
+    remove_tags_after = dict(attrs={'class':'section'})
+    keep_only_tags    = [dict(attrs={'class':'single'})]
+    remove_attributes = ['height','width']
+
+    feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/foreign_policy.recipe
+++ b/resources/recipes/foreign_policy.recipe
@ -0,0 +1,45 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.foreignpolicy.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ForeignPolicy(BasicNewsRecipe):
+    title                 = 'Foreign Policy'
+    __author__            = 'Darko Miletic'
+    description           = 'International News'
+    publisher             = 'Washingtonpost.Newsweek Interactive, LLC'
+    category              = 'news, politics, USA'
+    oldest_article        = 31
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags    = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
+    remove_tags       = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
+    remove_attributes = ['height','width']
+
+
+    feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
+
+    def print_version(self, url):
+        return url + '?print=yes&page=full'
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/haaretz_en.recipe
+++ b/resources/recipes/haaretz_en.recipe
@ -1,28 +1,33 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-haaretz.com
+www.haaretz.com
 '''

+import re
+from calibre import strftime
+from time import gmtime
 from calibre.web.feeds.news import BasicNewsRecipe

-class Haaretz_en(BasicNewsRecipe):
-    title                 = 'Haaretz in English'
+class HaaretzPrint_en(BasicNewsRecipe):
+    title                 = 'Haaretz - print edition'
    __author__            = 'Darko Miletic'
-    description           = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. '
-    publisher             = 'haaretz.com'
-    category              = 'news, politics, Israel'
+    description           = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
+    publisher             = 'Haaretz'
+    category              = "news, Haaretz, Israel news, Israel newspapers, Israel business news, Israel financial news, Israeli news,Israeli newspaper, Israeli newspapers, news from Israel, news in Israel, news Israel, news on Israel, newspaper Israel, Israel sports news, Israel diplomacy news"
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en_IL'
    publication_type      = 'newspaper'
-    remove_empty_feeds    = True
-    masthead_url          = 'http://www.haaretz.com/images/logos/logoGrey.gif'
+    PREFIX                = 'http://www.haaretz.com'
+    masthead_url          = PREFIX + '/images/logos/logoGrey.gif'
    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } '

+    preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
+
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
@ -30,27 +35,61 @@ class Haaretz_en(BasicNewsRecipe):
                        , 'language' : language
                        }

-    remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')]
-    remove_tags_before = dict(name='h1')
-    remove_tags_after  = dict(attrs={'id':'innerArticle'})
-    keep_only_tags     = [dict(attrs={'id':'content'})]
+    keep_only_tags    = [dict(attrs={'id':'threecolumns'})]
+    remove_attributes = ['width','height']
+    remove_tags       = [
+                           dict(name=['iframe','link','object','embed'])
+                          ,dict(name='div',attrs={'class':'rightcol'})
+                        ]


    feeds = [
-              (u'Opinion'               , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false'   )
-             ,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false')
-             ,(u'National'              , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false'       )
-             ,(u'International'         , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false'      )
-             ,(u'Jewish World'          , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false'        )
-             ,(u'Business'              , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false'     )
-             ,(u'Real Estate'           , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false'      )
-             ,(u'Features'              , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false'          )
-             ,(u'Arts and leisure'      , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false'       )
-             ,(u'Books'                 , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false'         )
-             ,(u'Food and Wine'         , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false'      )
-             ,(u'Sports'                , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false'          )
+              (u'News'          , PREFIX + u'/print-edition/news'         )
+             ,(u'Opinion'       , PREFIX + u'/print-edition/opinion'      )
+             ,(u'Business'      , PREFIX + u'/print-edition/business'     )
+             ,(u'Real estate'   , PREFIX + u'/print-edition/real-estate'  )
+             ,(u'Sports'        , PREFIX + u'/print-edition/sports'       )
+             ,(u'Travel'        , PREFIX + u'/print-edition/travel'       )
+             ,(u'Books'         , PREFIX + u'/print-edition/books'        )
+             ,(u'Food & Wine'   , PREFIX + u'/print-edition/food-wine'    )
+             ,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' )
+             ,(u'Features'      , PREFIX + u'/print-edition/features'     )
            ]

+
+    def print_version(self, url):
+        article = url.rpartition('/')[2]
+        return 'http://www.haaretz.com/misc/article-print-page/' + article
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll(attrs={'class':'text'}):
+                sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
+                desc = item.find('p')
+                description = ''
+                if sp:
+                    if desc:
+                       description = self.tag_to_string(desc)
+                    link        = sp.a
+                    url         = self.PREFIX + link['href']
+                    title       = self.tag_to_string(link)
+                    times        = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
+                    articles.append({
+                                          'title'      :title
+                                         ,'date'       :times
+                                         ,'url'        :url
+                                         ,'description':description
+                                        })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
+
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
--- a/resources/recipes/hindu.recipe
+++ b/resources/recipes/hindu.recipe
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'

-import re
+import time
 from calibre.web.feeds.news import BasicNewsRecipe

 class TheHindu(BasicNewsRecipe):
@ -10,45 +10,41 @@ class TheHindu(BasicNewsRecipe):
    language = 'en_IN'

    oldest_article        = 7
-    __author__            = 'Kovid Goyal and Sujata Raman'
+    __author__            = 'Kovid Goyal'
    max_articles_per_feed = 100
    no_stylesheets = True

-    remove_tags_before = {'name':'font', 'class':'storyhead'}
-    preprocess_regexps = [
-                (re.compile(r'<!-- story ends -->.*', re.DOTALL),
-                 lambda match: '</body></html>'),
-                          ]
-    extra_css = '''
-                .storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
-                body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
-                '''
-    feeds          = [
-      (u'Main - Front Page', u'http://www.hindu.com/rss/01hdline.xml'),
-      (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
-      (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
-      (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
-      (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
-      (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
-      (u'Main - Weather / Religion / Crossword / Cartoon',
-       u'http://www.hindu.com/rss/10hdline.xml'),
-      (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
-      (u'Supplement - Literary Review',
-       u'http://www.hindu.com/rss/lrhdline.xml'),
-      (u'Supplement - Sunday Magazine',
-       u'http://www.hindu.com/rss/maghdline.xml'),
-      (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
-      (u'Supplement - Business Review',
-       u'http://www.hindu.com/rss/bizhdline.xml'),
-      (u'Supplement - Book Review',
-       u'http://www.hindu.com/rss/brhdline.xml'),
-      (u'Supplement - Science & Technology',
-       u'http://www.hindu.com/rss/setahdline.xml')
-      ]
+    keep_only_tags = [dict(id='content')]
+    remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
+            dict(id=['email-section', 'right-column', 'printfooter'])]
+
+    extra_css = '.photo-caption { font-size: smaller }'

    def postprocess_html(self, soup, first_fetch):
        for t in soup.findAll(['table', 'tr', 'td','center']):
            t.name = 'div'
-
-
        return soup
+
+    def parse_index(self):
+        today = time.strftime('%Y-%m-%d')
+        soup = self.index_to_soup(
+                'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
+        div = soup.find(id='left-column')
+        feeds = []
+        current_section = None
+        current_articles = []
+        for x in div.findAll(['h3', 'div']):
+            if current_section and x.get('class', '') == 'tpaper':
+                a = x.find('a', href=True)
+                if a is not None:
+                    current_articles.append({'url':a['href']+'?css=print',
+                        'title':self.tag_to_string(a), 'date': '',
+                        'description':''})
+            if x.name == 'h3':
+                if current_section and current_articles:
+                    feeds.append((current_section, current_articles))
+                current_section = self.tag_to_string(x)
+                current_articles = []
+        return feeds
+
+
--- a/resources/recipes/houston_chronicle.recipe
+++ b/resources/recipes/houston_chronicle.recipe
@ -1,12 +1,15 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+import string, pprint
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class HoustonChronicle(BasicNewsRecipe):

    title          = u'The Houston Chronicle'
    description    = 'News from Houston, Texas'
-    __author__	   = 'Kovid Goyal and Sujata Raman'
+    __author__	   = 'Kovid Goyal'
    language       = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True
@ -38,54 +41,23 @@ class HoustonChronicle(BasicNewsRecipe):


    def parse_index(self):
-        soup = self.index_to_soup('http://www.chron.com/news/')
-        container = soup.find('table', attrs={'class':'body-columns'})
-
+        categories = ['news', 'sports', 'business', 'entertainment', 'life',
+                'travel']
        feeds = []
-        current_section = 'Top Stories'
-        current_articles = []
-
-        self.log('\tFound section:', current_section)
-
-        for div in container.findAll('div'):
-            if div.get('class', None) == 'module-mast':
-                t = self.tag_to_string(div).replace(u'\xbb', '').strip()
-                if t and 'interactives' not in t:
-                    if current_section and current_articles:
-                        feeds.append((current_section, current_articles))
-                    current_section = t
-                    current_articles = []
-                    self.log('\tFound section:', current_section)
-            elif div.get('storyid', False):
-                a = div.find('a', href=True)
-                if a:
-                    title = self.tag_to_string(a)
-                    url = a.get('href')
-                    if title and url:
-                        if url.startswith('/'):
+        for cat in categories:
+            articles = []
+            soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
+            for elem in soup.findAll(comptype='story', storyid=True):
+                a = elem.find('a', href=True)
+                if a is None: continue
+                url = a['href']
+                if not url.startswith('http://'):
                    url = 'http://www.chron.com'+url
-                        self.log('\t\tFound article:', title)
-                        self.log('\t\t\t', url)
-                        current_articles.append({'title':title, 'url':url,
-                            'date':'', 'description':''})
-            elif div.get('class', None) == 'columnbox' and \
-                    'special' in current_section.lower():
-                a = div.find('a')
-                if a:
-                    title = self.tag_to_string(a)
-                    url = a.get('href')
-                    if title and url:
-                        if not url.startswith('/'): continue
-                        url = 'http://www.chron.com'+url
-                        self.log('\t\tFound article:', title)
-                        self.log('\t\t\t', url)
-                        a.extract()
-                        desc = self.tag_to_string(div)
-                        current_articles.append({'title':title, 'url':url,
-                            'date':'', 'description':desc})
-
-        if current_section and current_articles:
-            feeds.append((current_section, current_articles))
+                articles.append({'title':self.tag_to_string(a), 'url':url,
+                    'description':'', 'date':''})
+                pprint.pprint(articles[-1])
+            if articles:
+                feeds.append((string.capwords(cat), articles))
        return feeds


--- a/resources/recipes/ifzm.recipe
+++ b/resources/recipes/ifzm.recipe
@ -0,0 +1,50 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277305250(BasicNewsRecipe):
+    title          = u'infzm - China Southern Weekly'
+    oldest_article = 14
+    max_articles_per_feed = 100
+
+    feeds          = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'),
+	(u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', u'http://www.infzm.com/rss/economic.xml'),
+	(u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', u'http://www.infzm.com/rss/culture.xml'),
+	(u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', u'http://www.infzm.com/rss/lifestyle.xml'),
+	(u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', u'http://www.infzm.com/rss/opinion.xml')
+	]
+    __author__            = 'rty'
+    __version__            = '1.0'
+    language = 'zh'
+    pubisher  = 'http://www.infzm.com'
+    description           = 'Chinese Weekly Tabloid'
+    category              = 'News, China'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    #encoding               = 'GB2312'
+    encoding               = 'UTF-8'
+    conversion_options = {'linearize_tables':True}
+    masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg'
+
+    extra_css = '''
+             @font-face { font-family: "DroidFont", serif, sans-serif;  src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
+             body {
+                  margin-right: 8pt;
+                  font-family: 'DroidFont', serif;}
+             .detailContent {font-family: 'DroidFont', serif, sans-serif}
+            '''
+
+    keep_only_tags = [
+                              dict(name='div', attrs={'id':'detailContent'}),
+                               ]
+    remove_tags = [
+                    dict(name='div', attrs={'id':['detailTools', 'detailSideL', 'pageNum']}),
+                         ]
+    remove_tags_after = [
+                  dict(name='div', attrs={'id':'pageNum'}),
+                         ]
+    def preprocess_html(self, soup):
+        for item in soup.findAll(color=True):
+            del item['font']
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/lrb.recipe
+++ b/resources/recipes/lrb.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lrb.co.uk
 '''
@ -8,17 +8,20 @@ lrb.co.uk
 from calibre.web.feeds.news import BasicNewsRecipe

 class LondonReviewOfBooks(BasicNewsRecipe):
-    title                 = u'London Review of Books'
-    __author__            = u'Darko Miletic'
-    description           = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
-    category              = 'news, literature, England'
-    publisher             = 'London Review of Books'
-    oldest_article        = 7
+    title                 = 'London Review of Books (free)'
+    __author__            = 'Darko Miletic'
+    description           = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
+    category              = 'news, literature, UK'
+    publisher             = 'LRB ltd.'
+    oldest_article        = 15
    max_articles_per_feed = 100
    language              = 'en_GB'
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
+    publication_type      = 'magazine'
+    masthead_url          = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
+    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '

    conversion_options = {
                             'comments'  : description
@ -27,13 +30,16 @@ class LondonReviewOfBooks(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }

-    keep_only_tags = [dict(name='div' , attrs={'id'   :'main'})]
-    remove_tags = [
-                    dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
-                   ,dict(name='div' , attrs={'id'   :['mainmenu','precontent','otherarticles']     })
-                   ,dict(name='span', attrs={'class':['inlineright','article-icons']})
-                   ,dict(name='ul'  , attrs={'class':'article-controls'})
-                   ,dict(name='p'   , attrs={'class':'meta-info'       })
-                  ]
+    keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
+    remove_attributes = ['width','height']

    feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
+
+    def get_cover_url(self):
+        cover_url = None
+        soup = self.index_to_soup('http://www.lrb.co.uk/')
+        cover_item = soup.find('p',attrs={'class':'cover'})
+        if cover_item:
+           cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
+        return cover_url
+
--- a/resources/recipes/lrb_payed.recipe
+++ b/resources/recipes/lrb_payed.recipe
@ -0,0 +1,75 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+lrb.co.uk
+'''
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LondonReviewOfBooksPayed(BasicNewsRecipe):
+    title                 = 'London Review of Books'
+    __author__            = 'Darko Miletic'
+    description           = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
+    category              = 'news, literature, UK'
+    publisher             = 'LRB Ltd.'
+    max_articles_per_feed = 100
+    language              = 'en_GB'
+    no_stylesheets        = True
+    delay                 = 1
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    INDEX                 = 'http://www.lrb.co.uk'
+    LOGIN                 = INDEX + '/login'
+    masthead_url          = INDEX + '/assets/images/lrb_logo_big.gif'
+    needs_subscription    = True
+    publication_type      = 'magazine'
+    extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
+
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=1)
+            br['username'] = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        articles = []
+        soup = self.index_to_soup(self.INDEX)
+        cover_item = soup.find('p',attrs={'class':'cover'})
+        lrbtitle = self.title
+        if  cover_item:
+            self.cover_url = self.INDEX + cover_item.a.img['src']
+            content = self.INDEX + cover_item.a['href']
+            soup2 = self.index_to_soup(content)
+            sitem = soup2.find(attrs={'class':'article-list'})
+            lrbtitle = soup2.head.title.string
+            for item in sitem.findAll('a',attrs={'class':'title'}):
+                description = u''
+                title_prefix = u''
+                feed_link = item
+                if feed_link.has_key('href'):
+                    url   = self.INDEX + feed_link['href']
+                    title = title_prefix + self.tag_to_string(feed_link)
+                    date  = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+        return [(lrbtitle, articles)]
+
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }
+
+    keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
+    remove_attributes = ['width','height']
--- a/resources/recipes/national_post.recipe
+++ b/resources/recipes/national_post.recipe
@ -7,18 +7,18 @@ class NYTimes(BasicNewsRecipe):
    __author__  = 'Krittika Goyal'
    description = 'Canadian national newspaper'
    timefmt = ' [%d %b, %Y]'
-    needs_subscription = False
    language = 'en_CA'
+    needs_subscription = False

    no_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
-    #remove_tags_after  = dict(name='td', attrs={'class':'newptool1'})
+    remove_tags_after  = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
    remove_tags = [
       dict(name='iframe'),
-       dict(name='div', attrs={'class':'story-tools'}),
+       dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
       #dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
       #dict(name='form', attrs={'onsubmit':''}),
-       #dict(name='table', attrs={'cellspacing':'0'}),
+       dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
    ]

   # def preprocess_html(self, soup):
@ -37,7 +37,7 @@ class NYTimes(BasicNewsRecipe):
    def parse_index(self):
            soup = self.nejm_get_index()

-            div = soup.find(id='LegoText4')
+            div = soup.find(id='npContentMain')

            current_section = None
            current_articles = []
@ -50,7 +50,7 @@ class NYTimes(BasicNewsRecipe):
                    current_section = self.tag_to_string(x)
                    current_articles = []
                    self.log('\tFound section:', current_section)
-                if current_section is not None and x.name == 'h3':
+                if current_section is not None and x.name == 'h5':
                    # Article found
                    title = self.tag_to_string(x)
                    a = x.find('a', href=lambda x: x and 'story' in x)
@ -59,7 +59,7 @@ class NYTimes(BasicNewsRecipe):
                    url = a.get('href', False)
                    if not url or not title:
                        continue
-                    if url.startswith('story'):
+                    #if url.startswith('story'):
                    url = 'http://www.nationalpost.com/todays-paper/'+url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
@ -70,28 +70,11 @@ class NYTimes(BasicNewsRecipe):
                feeds.append((current_section, current_articles))

            return feeds
-
    def preprocess_html(self, soup):
-        story = soup.find(name='div', attrs={'class':'triline'})
-        page2_link = soup.find('p','pagenav')
-        if page2_link:
-            atag = page2_link.find('a',href=True)
-            if atag:
-                page2_url = atag['href']
-                if page2_url.startswith('story'):
-                         page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
-                elif page2_url.startswith( '/todays-paper/story.html'):
-                    page2_url = 'http://www.nationalpost.com/'+page2_url
-                page2_soup = self.index_to_soup(page2_url)
-                if page2_soup:
-                    page2_content = page2_soup.find('div','story-content')
-                    if page2_content:
-                        full_story = BeautifulSoup('<div></div>')
-                        full_story.insert(0,story)
-                        full_story.insert(1,page2_content)
-                        story = full_story
+        story = soup.find(name='div', attrs={'id':'npContentMain'})
+        ##td = heading.findParent(name='td')
+        ##td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
-
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@ -32,15 +32,16 @@ class NewScientist(BasicNewsRecipe):
                        }
    preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]

-    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]
+    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]

    remove_tags = [
                     dict(name='div'  , attrs={'class':['hldBd','adline','pnl','infotext' ]})
-                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools']})
+                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools','comments','blgsocial']})
                    ,dict(name='p'    , attrs={'class':['marker','infotext'               ]})
                    ,dict(name='meta' , attrs={'name' :'description'                       })
+                    ,dict(name='a'    , attrs={'rel'  :'tag'                                })
                  ]
-    remove_tags_after = dict(attrs={'class':'nbpcopy'})
+    remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
    remove_attributes = ['height','width']

    feeds          = [
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -17,7 +17,7 @@ class NYTimes(BasicNewsRecipe):
    title       = 'New York Times Top Stories'
    __author__  = 'GRiker'
    language = 'en'
-    requires_version = (0, 7, 3)
+    requires_version = (0, 7, 5)
    description = 'Top Stories from the New York Times'

    # List of sections typically included in Top Stories.  Use a keyword from the
@ -79,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
                            'doubleRule',
                            'dottedLine',
                            'entry-meta',
+                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'module box nav',
@ -88,6 +89,7 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
+                            'subNavigation clearfix',
                            'subNavigation tabContent active',
                            'subNavigation tabContent active clearfix',
                            ]}),
@ -110,6 +112,7 @@ class NYTimes(BasicNewsRecipe):
                            'navigation',
                            'portfolioInline',
                            'relatedArticles',
+                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
@ -458,8 +461,10 @@ class NYTimes(BasicNewsRecipe):
                if mp_off >= 0:
                    c = c[:mp_off]
                emTag.insert(0, c)
-                hrTag = Tag(soup, 'hr')
-                #hrTag['style'] = "margin-top:0em;margin-bottom:0em"
+                #hrTag = Tag(soup, 'hr')
+                #hrTag['class'] = 'caption_divider'
+                hrTag = Tag(soup, 'div')
+                hrTag['class'] = 'divider'
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)

--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -13,14 +13,14 @@ Story
 import re, string, time
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag

 class NYTimes(BasicNewsRecipe):

    title       = 'The New York Times'
    __author__  = 'GRiker'
    language = 'en'
-    requires_version = (0, 7, 3)
+    requires_version = (0, 7, 5)

    description = 'Daily news from the New York Times (subscription version)'
    allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
@ -66,6 +66,7 @@ class NYTimes(BasicNewsRecipe):
                            'doubleRule',
                            'dottedLine',
                            'entry-meta',
+                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'module box nav',
@ -75,6 +76,7 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
+                            'subNavigation clearfix',
                            'subNavigation tabContent active',
                            'subNavigation tabContent active clearfix',
                            ]}),
@ -97,6 +99,7 @@ class NYTimes(BasicNewsRecipe):
                            'navigation',
                            'portfolioInline',
                            'relatedArticles',
+                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
@ -333,7 +336,7 @@ class NYTimes(BasicNewsRecipe):
                        self.log(">>> No class:'columnGroup  first' found <<<")
        # Change class="kicker" to <h3>
        kicker = soup.find(True, {'class':'kicker'})
-        if kicker and kicker.contents[0]:
+        if kicker and kicker.contents and kicker.contents[0]:
            h3Tag = Tag(soup, "h3")
            h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
                             use_alt=False)))
@ -348,8 +351,10 @@ class NYTimes(BasicNewsRecipe):
                if mp_off >= 0:
                    c = c[:mp_off]
                emTag.insert(0, c)
-                hrTag = Tag(soup, 'hr')
-                #hrTag['style'] = "margin-top:0em;margin-bottom:0em"
+                #hrTag = Tag(soup, 'hr')
+                #hrTag['class'] = 'caption_divider'
+                hrTag = Tag(soup, 'div')
+                hrTag['class'] = 'divider'
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)

@ -417,12 +422,11 @@ class NYTimes(BasicNewsRecipe):

        return soup

-    def postprocess_book(self, oeb, opts, log) :
-        print "\npostprocess_book()\n"
-
-        def extract_byline(href) :
-            # <meta name="byline" content=
-            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
+    def populate_article_metadata(self,article,soup,first):
+        '''
+        Extract author and description from article, add to article metadata
+        '''
+        def extract_author(soup):
            byline = soup.find('meta',attrs={'name':['byl','CLMST']})
            if byline :
                author = byline['content']
@ -432,50 +436,34 @@ class NYTimes(BasicNewsRecipe):
                if byline:
                    author = byline.renderContents()
                else:
-                    print "couldn't find byline in %s" % href
                    print soup.prettify()
                    return None
-            # Kill commas - Kindle switches to '&'
-            return re.sub(',','',author)
+            return author

-        def extract_description(href) :
-            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
+        def extract_description(soup):
            description = soup.find('meta',attrs={'name':['description','description ']})
            if description :
-#                print repr(description['content'])
-#                print self.massageNCXText(description['content'])
                return self.massageNCXText(description['content'])
            else:
                # Take first paragraph of article
-                articleBody = soup.find('div',attrs={'id':'articleBody'})
-                if not articleBody:
+                articlebody = soup.find('div',attrs={'id':'articlebody'})
+                if not articlebody:
                    # Try again with class instead of id
-                    articleBody = soup.find('div',attrs={'class':'articleBody'})
-                    if not articleBody:
-                        print 'postprocess_book.extract_description(): Did not find <div id="articleBody">:'
+                    articlebody = soup.find('div',attrs={'class':'articlebody'})
+                    if not articlebody:
+                        print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
                        print soup.prettify()
                        return None
-                paras = articleBody.findAll('p')
+                paras = articlebody.findAll('p')
                for p in paras:
                    if p.renderContents() > '' :
                        return self.massageNCXText(self.tag_to_string(p,use_alt=False))
                return None

-        # Method entry point here
-        # Single section toc looks different than multi-section tocs
-        if oeb.toc.depth() == 2 :
-            for article in oeb.toc :
-                if article.author is None :
-                    article.author = extract_byline(article.href)
-                if article.description is None :
-                    article.description = extract_description(article.href).decode('utf-8')
-        elif oeb.toc.depth() == 3 :
-            for section in oeb.toc :
-                for article in section :
-                    if article.author is None :
-                        article.author = extract_byline(article.href)
-                    if article.description is None :
-                        article.description = extract_description(article.href)
+        if not article.author:
+            article.author = extract_author(soup)
+        if not article.summary:
+            article.summary = article.text_summary = extract_description(soup)

    def strip_anchors(self,soup):
        paras = soup.findAll(True)
--- a/resources/recipes/oldnewthing.recipe
+++ b/resources/recipes/oldnewthing.recipe
@ -28,7 +28,7 @@ class OldNewThing(BasicNewsRecipe):
                        }

    remove_attributes = ['width','height']
-    keep_only_tags    = [dict(attrs={'class':['postsub','comment']})]
-
+    keep_only_tags    = [dict(attrs={'class':'full-post'})]
+    remove_tags       = [dict(attrs={'class':['post-attributes','post-tags','post-actions']})]
    feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]

--- a/resources/recipes/singtao_daily.recipe
+++ b/resources/recipes/singtao_daily.recipe
@ -0,0 +1,79 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class AdvancedUserRecipe1278063072(BasicNewsRecipe):
+    title          = u'Singtao Daily - Canada'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__            = 'rty'
+    description           = 'Toronto Canada Chinese Newspaper'
+    publisher             = 'news.singtao.ca'
+    category              = 'Chinese, News, Canada'
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'zh'
+    conversion_options = {'linearize_tables':True}
+    masthead_url = 'http://news.singtao.ca/i/site_2009/logo.jpg'
+    extra_css = '''
+        @font-face {font-family: "DroidFont", serif, sans-serif; src: url(res:///system/fonts/DroidSansFallback.ttf); }\
+
+    body {text-align: justify; margin-right: 8pt; font-family: 'DroidFont', serif;}\
+
+                    h1 {font-family: 'DroidFont', serif;}\
+
+                    .articledescription {font-family: 'DroidFont', serif;}
+            '''
+    keep_only_tags = [
+    dict(name='div', attrs={'id':['title','storybody']}),
+    dict(name='div', attrs={'class':'content'})
+    ]
+
+    def parse_index(self):
+            feeds = []
+            for title, url in [
+               ('Editorial',
+                   'http://news.singtao.ca/toronto/editorial.html'),
+               ('Toronto   \xe5\x9f\x8e\xe5\xb8\x82/\xe7\xa4\xbe\xe5\x8d\x80'.decode('utf-8'),
+                   'http://news.singtao.ca/toronto/city.html'),
+               ('Canada \xe5\x8a\xa0\xe5\x9c\x8b'.decode('utf-8'),
+                   'http://news.singtao.ca/toronto/canada.html'),
+               ('Entertainment',
+                   'http://news.singtao.ca/toronto/entertainment.html'),
+               ('World',
+                   'http://news.singtao.ca/toronto/world.html'),
+               ('Finance \xe5\x9c\x8b\xe9\x9a\x9b\xe8\xb2\xa1\xe7\xb6\x93'.decode('utf-8'),
+                   'http://news.singtao.ca/toronto/finance.html'),
+               ('Sports', 'http://news.singtao.ca/toronto/sports.html'),
+                            ]:
+               articles = self.parse_section(url)
+               if articles:
+                   feeds.append((title, articles))
+            return feeds
+
+    def parse_section(self, url):
+            soup = self.index_to_soup(url)
+            div = soup.find(attrs={'class': ['newslist paddingL10T10','newslist3 paddingL10T10']})
+            #date = div.find(attrs={'class': 'underlineBLK'})
+            current_articles = []
+            for li in div.findAll('li'):
+                    a = li.find('a', href = True)
+                    if a is None:
+                        continue
+                    title = self.tag_to_string(a)
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                         url = 'http://news.singtao.ca'+url
+          #          self.log('\    \    Found article:', title)
+          #          self.log('\    \    \    ', url)
+                    current_articles.append({'title': title, 'url': url, 'description':''})
+
+            return current_articles
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+           del item['style']
+        for item in soup.findAll(width=True):
+           del item['width']
+        return soup
--- a/resources/recipes/statesman.recipe
+++ b/resources/recipes/statesman.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1278049615(BasicNewsRecipe):
+    title          = u'Statesman'
+    pubisher  = 'http://www.statesman.com/'
+    description           = 'Austin Texas Daily Newspaper'
+    category              = 'News, Austin, Texas'
+    __author__            = 'rty'
+    oldest_article = 3
+
+    max_articles_per_feed = 100
+
+    feeds          = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
+	(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
+	(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
+	(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
+	(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
+	]
+    masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
+    #temp_files = []
+    #articles_are_obfuscated = True
+
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en'
+    encoding               = 'utf-8'
+    conversion_options = {'linearize_tables':True}
+    remove_tags = [
+                    dict(name='div', attrs={'id':'cxArticleOptions'}),
+                        ]
+    keep_only_tags = [
+	 dict(name='div', attrs={'class':'cxArticleHeader'}),
+                     dict(name='div', attrs={'id':'cxArticleBodyText'}),
+                               ]
--- a/resources/recipes/toi.recipe
+++ b/resources/recipes/toi.recipe
@ -1,21 +1,16 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class TimesOfIndia(BasicNewsRecipe):
    title          = u'Times of India'
    language       = 'en_IN'
-    __author__     = 'Krittika Goyal'
+    __author__     = 'Kovid Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25

-    remove_stylesheets = True
+    no_stylesheets = True
+    keep_only_tags = [dict(attrs={'class':'prttabl'})]
    remove_tags = [
-       dict(name='iframe'),
-       dict(name='td', attrs={'class':'newptool1'}),
-       dict(name='div', attrs={'id':'newptool'}),
-       dict(name='ul', attrs={'class':'newtabcontent_tabs_new'}),
-       dict(name='b', text='Topics'),
-       dict(name='span', text=':'),
+            dict(style=lambda x: x and 'float' in x)
    ]

    feeds          = [
@ -42,13 +37,8 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
+    def print_version(self, url):
+        return url + '?prtpage=1'

    def preprocess_html(self, soup):
-        heading = soup.find(name='h1', attrs={'class':'heading'})
-        td = heading.findParent(name='td')
-        td.extract()
-        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
-        body = soup.find(name='body')
-        body.insert(0, td)
-        td.name = 'div'
        return soup
--- a/resources/recipes/winnipeg_sun.recipe
+++ b/resources/recipes/winnipeg_sun.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277647803(BasicNewsRecipe):
+    title          = u'Winnipeg Sun'
+    __author__            = 'rty'
+    __version__            = '1.0'
+    oldest_article = 2
+    pubisher  = 'www.winnipegsun.com'
+    description           = 'Winnipeg Newspaper'
+    category              = 'News, Winnipeg, Canada'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'UTF-8'
+    remove_javascript     = True
+    use_embedded_content  = False
+    language = 'en_CA'
+    feeds          = [
+	(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
+	(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
+	(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
+	(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
+	(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
+	(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
+	]
+    keep_only_tags = [
+                              dict(name='div', attrs={'id':'article'}),
+                              ]
+    remove_tags = [
+           	dict(name='div', attrs={'class':['leftBox','bottomBox clear']}),
+    	dict(name='ul', attrs={'class':'tabs dl contentSwap'}),
+           	dict(name='div', attrs={'id':'commentsBottom'}),
+                      ]
+    remove_tags_after = [
+                             dict(name='div', attrs={'class':'bottomBox clear'})
+                           ]
--- a/resources/recipes/zaobao.recipe
+++ b/resources/recipes/zaobao.recipe
@ -15,22 +15,22 @@ class ZAOBAO(BasicNewsRecipe):
    no_stylesheets = True
    recursions     = 1
    language = 'zh'
-
    encoding     = 'gbk'
 #    multithreaded_fetch = True

    keep_only_tags    = [
-						dict(name='table', attrs={'cellpadding':'9'}),
-						dict(name='table', attrs={'class':'cont'}),
-						dict(name='div', attrs={'id':'content'}),
+						dict(name='td', attrs={'class':'text'}),
 						dict(name='span', attrs={'class':'page'}),
+                        dict(name='div', attrs={'id':'content'})
 					]

    remove_tags    = [
 						dict(name='table', attrs={'cellspacing':'9'}),
+                        dict(name='fieldset'),
+                        dict(name='div', attrs={'width':'30%'}),
 					]

-    extra_css      = '\
+    extra_css      = '\n\
            @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}\n\
            body{font-family: serif1, serif}\n\
            .article_description{font-family: serif1, serif}\n\
@ -41,7 +41,10 @@ class ZAOBAO(BasicNewsRecipe):
            .article {font-size:medium}\n\
            .navbar {font-size: small}\n\
            .feed{font-size: medium}\n\
-			.small{font-size: small; padding-right: 8%}\n'
+            .small{font-size: small;padding-right: 8pt}\n\
+            .text{padding-right: 8pt}\n\
+            p{text-indent: 0cm}\n\
+            div#content{padding-right: 10pt}'

    INDEXES		   = [
                       (u'\u65b0\u95fb\u56fe\u7247', u'http://www.zaobao.com/photoweb/photoweb_idx.shtml')
@ -65,13 +68,21 @@ class ZAOBAO(BasicNewsRecipe):
                        (u'\u65e9\u62a5\u526f\u520a', u'http://www.zaobao.com/fk/fk.xml'),
                    ]

+    def preprocess_html(self, soup):
+        for tag in soup.findAll(name='a'):
+            if tag.has_key('href'):
+                tag_url = tag['href']
+                if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
+                    del tag['href']
+        return soup
+
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(name=['table', 'tr', 'td']):
            tag.name = 'div'
        return soup

    def parse_feeds(self):
-        self.log.debug('ZAOBAO overrided parse_feeds()')
+        self.log_debug(_('ZAOBAO overrided parse_feeds()'))
        parsed_feeds = BasicNewsRecipe.parse_feeds(self)

        for id, obj in enumerate(self.INDEXES):
@ -88,7 +99,7 @@ class ZAOBAO(BasicNewsRecipe):
                    a_title = self.tag_to_string(a)
                    date = ''
                    description = ''
-                    self.log.debug('adding %s at %s'%(a_title,a_url))
+                    self.log_debug(_('adding %s at %s')%(a_title,a_url))
                    articles.append({
                                    'title':a_title,
                                    'date':date,
@ -97,26 +108,25 @@ class ZAOBAO(BasicNewsRecipe):
                                    })

            pfeeds = feeds_from_index([(title, articles)], oldest_article=self.oldest_article,
-                                     max_articles_per_feed=self.max_articles_per_feed,
-                                     log=self.log)
+                                     max_articles_per_feed=self.max_articles_per_feed)

-            self.log.debug('adding %s to feed'%(title))
+            self.log_debug(_('adding %s to feed')%(title))
            for feed in pfeeds:
-                self.log.debug('adding feed: %s'%(feed.title))
+                self.log_debug(_('adding feed: %s')%(feed.title))
                feed.description = self.DESC_SENSE
                parsed_feeds.append(feed)
                for a, article in enumerate(feed):
-                    self.log.debug('added article %s from %s'%(article.title, article.url))
-                self.log.debug('added feed %s'%(feed.title))
+                    self.log_debug(_('added article %s from %s')%(article.title, article.url))
+                self.log_debug(_('added feed %s')%(feed.title))

        for i, feed in enumerate(parsed_feeds):
            # workaorund a strange problem: Somethimes the xml encoding is not apllied correctly by parse()
            weired_encoding_detected = False
            if not isinstance(feed.description, unicode) and self.encoding and feed.description:
-                self.log.debug('Feed %s is not encoded correctly, manually replace it'%(feed.title))
+                self.log_debug(_('Feed %s is not encoded correctly, manually replace it')%(feed.title))
                feed.description = feed.description.decode(self.encoding, 'replace')
            elif feed.description.find(self.DESC_SENSE) == -1 and self.encoding and feed.description:
-                self.log.debug('Feed %s is strangely encoded, manually redo all'%(feed.title))
+                self.log_debug(_('Feed %s is weired encoded, manually redo all')%(feed.title))
                feed.description = feed.description.encode('cp1252', 'replace').decode(self.encoding, 'replace')
                weired_encoding_detected = True

@ -138,7 +148,7 @@ class ZAOBAO(BasicNewsRecipe):
                        article.text_summary = article.text_summary.encode('cp1252', 'replace').decode(self.encoding, 'replace')

                if article.title == "Untitled article":
-                    self.log.debug('Removing empty article %s from %s'%(article.title, article.url))
+                    self.log_debug(_('Removing empty article %s from %s')%(article.title, article.url))
                    # remove the article
                    feed.articles[a:a+1] = []
        return parsed_feeds
--- a/resources/templates/html.css
+++ b/resources/templates/html.css
@ -406,3 +406,8 @@ img, object, svg|svg {
    width: auto;
    height: auto;
 }
+
+/* These are needed because ADE renders anchors the same as links */
+
+a { text-decoration: inherit; color: inherit; cursor: inherit }
+a[href] { text-decoration: underline; color: blue; cursor: pointer }
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -40,19 +40,20 @@ class LinuxFreeze(Command):
                        '/usr/bin/pdftohtml',
                        '/usr/lib/libwmflite-0.2.so.7',
                        '/usr/lib/liblcms.so.1',
+                        '/usr/lib/liblcms2.so.2',
+                        '/usr/lib/libstlport.so.5.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libunrar.so',
                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
-                        '/usr/lib/libpodofo.so.0.6.99',
+                        '/usr/lib/libpodofo.so.0.8.1',
                        '/lib/libz.so.1',
                        '/lib/libuuid.so.1',
-                        '/usr/lib/libtiff.so.3',
+                        '/usr/lib/libtiff.so.5',
                        '/lib/libbz2.so.1',
-                        '/usr/lib/libpoppler.so.5',
-                        '/usr/lib/libpoppler-qt4.so.3',
+                        '/usr/lib/libpoppler.so.6',
                        '/usr/lib/libxml2.so.2',
                        '/usr/lib/libopenjpeg.so.2',
                        '/usr/lib/libxslt.so.1',
@ -61,10 +62,10 @@ class LinuxFreeze(Command):
                        '/usr/lib/libgthread-2.0.so.0',
                        stdcpp,
                        ffi,
-                        '/usr/lib/libpng12.so.0',
+                        '/usr/lib/libpng14.so.14',
                        '/usr/lib/libexslt.so.0',
-                        '/usr/lib/libMagickWand.so.2',
-                        '/usr/lib/libMagickCore.so.2',
+                        '/usr/lib/libMagickWand.so.3',
+                        '/usr/lib/libMagickCore.so.3',
                        '/usr/lib/libgcrypt.so.11',
                        '/usr/lib/libgpg-error.so.0',
                        '/usr/lib/libphonon.so.4',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -265,6 +265,9 @@ class Py2App(object):
    @flush
    def get_local_dependencies(self, path_to_lib):
        for x in self.get_dependencies(path_to_lib):
+            if x.startswith('libpodofo'):
+                yield x, x
+                continue
            for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/',
                    '/opt/local/lib/',
                    '/Library/Frameworks/Python.framework/', SW+'/freetype/lib/'):
@ -397,7 +400,7 @@ class Py2App(object):
    @flush
    def add_podofo(self):
        info('\nAdding PoDoFo')
-        pdf = join(SW, 'lib', 'libpodofo.0.6.99.dylib')
+        pdf = join(SW, 'lib', 'libpodofo.0.8.1.dylib')
        self.install_dylib(pdf)

    @flush
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -13,7 +13,7 @@ from setup import Command, modules, functions, basenames, __version__, \
 from setup.build_environment import msvc, MT, RC
 from setup.installer.windows.wix import WixMixIn

-QT_DIR = 'C:\\Qt\\4.6.0'
+QT_DIR = 'C:\\Qt\\4.6.3'
 QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
 LIBUSB_DIR       = 'C:\\libusb'
 LIBUNRAR         = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -162,9 +162,50 @@ SET(WANT_LIB64 FALSE)
 SET(PODOFO_BUILD_SHARED TRUE)
 SET(PODOFO_BUILD_STATIC FALSE)

-cp build/podofo-0.7.0/build/src/Release/podofo.dll bin/
-cp build/podofo-0.7.0/build/src/Release/podofo.lib lib/
-cp build/podofo-0.7.0/build/src/Release/podofo.exp lib/
+cp build/podofo/build/src/Release/podofo.dll bin/
+cp build/podofo/build/src/Release/podofo.lib lib/
+cp build/podofo/build/src/Release/podofo.exp lib/
+
+cp build/podofo/build/podofo_config.h include/podofo/
+cp -r build/podofo/src/* include/podofo/
+
+The following patch was required to get it to compile:
+
+Index: src/PdfImage.cpp
+===================================================================
+--- src/PdfImage.cpp    (revision 1261)
+++ src/PdfImage.cpp    (working copy)
+@@ -627,7 +627,7 @@
+ 
+     long lLen = static_cast<long>(pInfo->rowbytes * height);
+     char* pBuffer = static_cast<char*>(malloc(sizeof(char) * lLen));
+-    png_bytep pRows[height];
+    png_bytepp pRows = static_cast<png_bytepp>(malloc(sizeof(png_bytep)*height));
+     for(int y=0; y<height; y++)
+     {
+         pRows[y] = reinterpret_cast<png_bytep>(pBuffer + (y * pInfo->rowbytes));
+@@ -672,6 +672,7 @@
+     this->SetImageData( width, height, pInfo->bit_depth, &stream );
+     
+     free(pBuffer);
+       free(pRows);
+ }
+ #endif // PODOFO_HAVE_PNG_LIB
+
+Index: src/PdfFiltersPrivate.cpp
+===================================================================
+--- src/PdfFiltersPrivate.cpp   (revision 1261)
+++ src/PdfFiltersPrivate.cpp   (working copy)
+@@ -1019,7 +1019,7 @@
+ /*
+  * Prepare for input from a memory buffer.
+  */
+-GLOBAL(void)
+void
+ jpeg_memory_src (j_decompress_ptr cinfo, const JOCTET * buffer, size_t bufsize)
+ {
+     my_src_ptr src;
+

 ImageMagick
 --------------
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -154,6 +154,10 @@
                <CustomAction Id="LaunchApplication" BinaryKey="WixCA"
                    DllEntry="WixShellExec" Impersonate="yes"/>

+                <InstallUISequence>
+                    <FileCost Suppress="yes" />
+                </InstallUISequence>
+
 		</Product>
 </Wix>

--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -30,6 +30,7 @@ mimetypes.add_type('application/epub+zip',                '.epub')
 mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
 mimetypes.add_type('application/xhtml+xml',               '.xhtml')
 mimetypes.add_type('image/svg+xml',                       '.svg')
+mimetypes.add_type('text/fb2+xml',                        '.fb2')
 mimetypes.add_type('application/x-sony-bbeb',             '.lrf')
 mimetypes.add_type('application/x-sony-bbeb',             '.lrx')
 mimetypes.add_type('application/x-dtbncx+xml',            '.ncx')
@ -43,6 +44,7 @@ mimetypes.add_type('application/x-mobipocket-ebook',      '.prc')
 mimetypes.add_type('application/x-mobipocket-ebook',      '.azw')
 mimetypes.add_type('application/x-cbz',                   '.cbz')
 mimetypes.add_type('application/x-cbr',                   '.cbr')
+mimetypes.add_type('application/x-koboreader-ebook',      '.kobo')
 mimetypes.add_type('image/wmf',                           '.wmf')
 guess_type = mimetypes.guess_type
 import cssutils
@ -340,13 +342,6 @@ def detect_ncpus():
    return ans


-def launch(path_or_url):
-    from PyQt4.QtCore import QUrl
-    from PyQt4.QtGui  import QDesktopServices
-    if os.path.exists(path_or_url):
-        path_or_url = 'file:'+path_or_url
-    QDesktopServices.openUrl(QUrl(path_or_url))
-
 relpath = os.path.relpath
 _spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)
 def english_sort(x, y):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.4'
+__version__   = '0.7.8'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata

+# To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
    author = 'Kovid Goyal'
@ -30,6 +31,7 @@ every time you add an HTML file to the library.\

        with TemporaryDirectory('_plugin_html2zip') as tdir:
            recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
+            recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
            if self.site_customization and self.site_customization.strip():
                recs.append(['input_encoding', self.site_customization.strip(),
                    OptionRecommendation.HIGH])
@ -81,7 +83,9 @@ class PML2PMLZ(FileTypePlugin):

        return of.name

+# }}}

+# Metadata reader plugins {{{
 class ComicMetadataReader(MetadataReaderPlugin):

    name = 'Read comic metadata'
@ -319,7 +323,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.zip import get_metadata
        return get_metadata(stream)
+# }}}

+# Metadata writer plugins {{{

 class EPUBMetadataWriter(MetadataWriterPlugin):

@ -395,6 +401,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.topaz import set_metadata
        set_metadata(stream, mi)

+# }}}

 from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.epub.input import EPUBInput
@ -436,7 +443,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
-                BOOQ, ELONEX, POCKETBOOK301
+                BOOQ, ELONEX, POCKETBOOK301, MENTOR
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
@ -444,7 +451,7 @@ from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.android.driver import ANDROID, S60
-from calibre.devices.nokia.driver import N770, N810, E71X
+from calibre.devices.nokia.driver import N770, N810, E71X, E52
 from calibre.devices.eslick.driver import ESLICK, EBK52
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
@ -453,7 +460,7 @@ from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS
 from calibre.devices.sne.driver import SNE
-from calibre.devices.misc import PALMPRE, AVANT
+from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO

@ -461,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
    LibraryThing
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
+from calibre.ebooks.epub.fix.unmanifested import Unmanifested
+from calibre.ebooks.epub.fix.epubcheck import Epubcheck
+
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI]
+        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
 plugins += [
    ComicInput,
    EPUBInput,
@ -499,7 +509,6 @@ plugins += [
 ]
 # Order here matters. The first matched device is the one used.
 plugins += [
-    ITUNES,
    HANLINV3,
    HANLINV5,
    BLACKBERRY,
@ -520,6 +529,7 @@ plugins += [
    S60,
    N770,
    E71X,
+    E52,
    N810,
    COOL_ER,
    ESLICK,
@ -550,6 +560,10 @@ plugins += [
    AZBOOKA,
    FOLDER_DEVICE_FOR_CONFIG,
    AVANT,
+    MENTOR,
+    SWEEX,
+    PDNOVEL,
+    ITUNES,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -36,7 +36,7 @@ class Plugin(_Plugin):
        self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
        self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)

-
+# Input profiles {{{
 class InputProfile(Plugin):

    author = 'Kovid Goyal'
@ -218,6 +218,8 @@ input_profiles = [InputProfile, SonyReaderInput, SonyReader300Input,

 input_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))

+# }}}
+
 class OutputProfile(Plugin):

    author = 'Kovid Goyal'
@ -237,11 +239,12 @@ class OutputProfile(Plugin):
    # If True the MOBI renderer on the device supports MOBI indexing
    supports_mobi_indexing = False

-    # Device supports displaying a nested TOC
-    supports_nested_toc = True
-
    # If True output should be optimized for a touchscreen interface
    touchscreen = False
+    touchscreen_news_css = ''
+    # A list of extra (beyond CSS 2.1) modules supported by the device
+    # Format is a cssutils profile dictionary (see iPad for example)
+    extra_css_modules = []

    @classmethod
    def tags_to_string(cls, tags):
@ -256,8 +259,151 @@ class iPadOutput(OutputProfile):
    screen_size = (768, 1024)
    comic_screen_size = (768, 1024)
    dpi = 132.0
-    supports_nested_toc = False
+    extra_css_modules = [
+        {
+            'name':'webkit',
+            'props': { '-webkit-border-bottom-left-radius':'{length}',
+                '-webkit-border-bottom-right-radius':'{length}',
+                '-webkit-border-top-left-radius':'{length}',
+                '-webkit-border-top-right-radius':'{length}',
+                '-webkit-border-radius': r'{border-width}(\s+{border-width}){0,3}|inherit',
+            },
+            'macros': {'border-width': '{length}|medium|thick|thin'}
+        }
+    ]
    touchscreen = True
+    # touchscreen_news_css {{{
+    touchscreen_news_css = u'''
+			/* hr used in articles */
+			.article_articles_list {
+                width:18%;
+				}
+            .article_link {
+            	color: #593f29;
+                font-style: italic;
+                }
+            .article_next {
+				-webkit-border-top-right-radius:4px;
+				-webkit-border-bottom-right-radius:4px;
+                font-style: italic;
+                width:32%;
+                }
+
+            .article_prev {
+				-webkit-border-top-left-radius:4px;
+				-webkit-border-bottom-left-radius:4px;
+                font-style: italic;
+                width:32%;
+                }
+			.article_sections_list {
+                width:18%;
+				}
+            .articles_link {
+                font-weight: bold;
+                }
+            .sections_link {
+                font-weight: bold;
+                }
+
+
+            .caption_divider {
+            	border:#ccc 1px solid;
+				}
+
+            .touchscreen_navbar {
+                background:#c3bab2;
+                border:#ccc 0px solid;
+                border-collapse:separate;
+                border-spacing:1px;
+                margin-left: 5%;
+                margin-right: 5%;
+                width: 90%;
+                -webkit-border-radius:4px;
+                }
+            .touchscreen_navbar td {
+                background:#fff;
+                font-family:Helvetica;
+                font-size:80%;
+                /* UI touchboxes use 8px padding */
+                padding: 6px;
+                text-align:center;
+                }
+
+			.touchscreen_navbar td a:link {
+				color: #593f29;
+				text-decoration: none;
+				}
+
+			/* Index formatting */
+			.publish_date {
+				text-align:center;
+				}
+			.divider {
+				border-bottom:1em solid white;
+				border-top:1px solid gray;
+				}
+
+			hr.caption_divider {
+				border-color:black;
+				border-style:solid;
+				border-width:1px;
+				}
+
+            /* Feed summary formatting */
+            .article_summary {
+            	display:inline-block;
+            	}
+            .feed {
+                font-family:sans-serif;
+                font-weight:bold;
+                font-size:larger;
+				}
+
+            .feed_link {
+                font-style: italic;
+                }
+
+            .feed_next {
+				-webkit-border-top-right-radius:4px;
+				-webkit-border-bottom-right-radius:4px;
+                font-style: italic;
+                width:40%;
+                }
+
+            .feed_prev {
+				-webkit-border-top-left-radius:4px;
+				-webkit-border-bottom-left-radius:4px;
+                font-style: italic;
+                width:40%;
+                }
+
+            .feed_title {
+                text-align: center;
+                font-size: 160%;
+                }
+
+			.feed_up {
+                font-weight: bold;
+                width:20%;
+				}
+
+            .summary_headline {
+                font-weight:bold;
+                text-align:left;
+				}
+
+            .summary_byline {
+                text-align:left;
+                font-family:monospace;
+				}
+
+            .summary_text {
+                text-align:left;
+				}
+
+        '''
+        # }}}
+

 class SonyReaderOutput(OutputProfile):

--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser, prefs
+from calibre.ebooks.epub.fix import ePubFixer


 platform = 'linux'
@ -151,13 +152,13 @@ def reread_filetype_plugins():


 def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
-    occasion = {'import':_on_import, 'preprocess':_on_preprocess,
+    occasion_plugins = {'import':_on_import, 'preprocess':_on_preprocess,
                'postprocess':_on_postprocess}[occasion]
    customization = config['plugin_customization']
    if ft is None:
        ft = os.path.splitext(path_to_file)[-1].lower().replace('.', '')
    nfp = path_to_file
-    for plugin in occasion.get(ft, []):
+    for plugin in occasion_plugins.get(ft, []):
        if is_disabled(plugin):
            continue
        plugin.site_customization = customization.get(plugin.name, '')
@ -194,7 +195,6 @@ def plugin_customization(plugin):

 # }}}

-
 # Input/Output profiles {{{
 def input_profiles():
    for plugin in _initialized_plugins:
@ -444,6 +444,14 @@ def device_plugins(): # {{{
                    yield plugin
 # }}}

+# epub fixers {{{
+def epub_fixers():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, ePubFixer):
+            if not is_disabled(plugin):
+                if platform in plugin.supported_platforms:
+                    yield plugin
+# }}}

 # Initialize plugins {{{

--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -34,6 +34,12 @@ class ANDROID(USBMS):

            # Acer
            0x502 : { 0x3203 : [0x0100]},
+
+            # Dell
+            0x413c : { 0xb007 : [0x0100]},
+
+            # Eken?
+            0x040d : { 0x0851 : [0x0001]},
            }
    EBOOK_DIR_MAIN = ['wordplayer/calibretransfer', 'eBooks/import', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -42,11 +48,12 @@ class ANDROID(USBMS):
    EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)

    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
-            'GT-I5700', 'SAMSUNG']
+            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD',
-            'PROD_GT-I9000']
-    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'PROD_GT-I9000_CARD']
+            'GT-I9000', 'FILE-STOR_GADGET']
+    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD',
+            'FILE-STOR_GADGET']

    OSX_MAIN_MEM = 'HTC Android Phone Media'

@ -63,6 +70,16 @@ class ANDROID(USBMS):
            dirs = [x.strip() for x in dirs.split(',')]
        self.EBOOK_DIR_MAIN = dirs

+    def get_main_ebook_dir(self, for_upload=False):
+        dirs = self.EBOOK_DIR_MAIN
+        if not for_upload:
+            def aldiko_tweak(x):
+                return 'eBooks' if x == 'eBooks/import' else x
+            if isinstance(dirs, basestring):
+                dirs = [dirs]
+            dirs = list(map(aldiko_tweak, dirs))
+        return dirs
+
 class S60(USBMS):

    name = 'S60 driver'
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -186,6 +186,15 @@ class BOOQ(EB600):
    WINDOWS_MAIN_MEM = 'EB600'
    WINDOWS_CARD_A_MEM = 'EB600'

+class MENTOR(EB600):
+
+    name = 'Astak Mentor EB600'
+    gui_name = 'Mentor'
+    description = _('Communicate with the Astak Mentor EB600')
+    FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'pdf', 'txt']
+
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'MENTOR'
+
 class ELONEX(EB600):

    name = 'Elonex 600EB'
--- a/src/calibre/devices/folder_device/driver.py
+++ b/src/calibre/devices/folder_device/driver.py
@ -66,7 +66,7 @@ class FOLDER_DEVICE(USBMS):
              detected_device=None):
        pass

-    def disconnect_from_folder(self):
+    def unmount_device(self):
        self._main_prefix = ''
        self.is_connected = False

--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -106,9 +106,11 @@ class BOOX(HANLINV3):
    description    = _('Communicate with the BOOX eBook reader.')
    author         = 'Jesus Manuel Marinho Valcarce'
    supported_platforms = ['windows', 'osx', 'linux']
+    METADATA_CACHE = '.metadata.calibre'

    # Ordered list of supported formats
-    FORMATS     = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi', 'prc', 'chm']
+    FORMATS     = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
+                   'prc', 'chm', 'doc']

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
--- a/src/calibre/devices/hanvon/driver.py
+++ b/src/calibre/devices/hanvon/driver.py
@ -24,7 +24,7 @@ class N516(USBMS):

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
-    BCD         = [0x323, 0x326]
+    BCD         = [0x323, 0x326, 0x327]

    VENDOR_NAME      = 'INGENIC'
    WINDOWS_MAIN_MEM = '_FILE-STOR_GADGE'
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -59,7 +59,7 @@ class DevicePlugin(Plugin):
            return cls.__name__
        return cls.name

-
+    # Device detection {{{
    def test_bcd_windows(self, device_id, bcd):
        if bcd is None or len(bcd) == 0:
            return True
@ -152,6 +152,7 @@ class DevicePlugin(Plugin):
                                    return True, dev
        return False, None

+    # }}}

    def reset(self, key='-1', log_packets=False, report_progress=None,
            detected_device=None) :
@ -372,14 +373,12 @@ class DevicePlugin(Plugin):
    @classmethod
    def settings(cls):
        '''
-        Should return an opts object. The opts object should have one attribute
+        Should return an opts object. The opts object should have at least one attribute
        `format_map` which is an ordered list of formats for the device.
        '''
        raise NotImplementedError()


-
-
 class BookList(list):
    '''
    A list of books. Each Book object must have the fields:
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -213,7 +213,7 @@ class KINDLE_DX(KINDLE2):
    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]

-class Bookmark():
+class Bookmark(): # {{{
    '''
    A simple class fetching bookmark data
    Kindle-specific
@ -429,6 +429,7 @@ class Bookmark():
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
+                self.pdf_page_offset = 0
                while current_entry < entries:
                    '''
                    location, = unpack('>I', data[e_base+2:e_base+6])
@ -516,3 +517,6 @@ class Bookmark():

        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
+
+# }}}
+
--- a/src/calibre/devices/kobo/books.py
+++ b/src/calibre/devices/kobo/books.py
@ -0,0 +1,116 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Timothy Legge <timlegge at gmail.com>'
+'''
+'''
+
+import os
+import re
+import time
+
+from calibre.ebooks.metadata import MetaInformation
+from calibre.constants import filesystem_encoding, preferred_encoding
+from calibre import isbytestring
+
+class Book(MetaInformation):
+
+    BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']
+
+    JSON_ATTRS = [
+        'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
+        'title_sort', 'comments', 'category', 'publisher', 'series',
+        'series_index', 'rating', 'isbn', 'language', 'application_id',
+        'book_producer', 'lccn', 'lcc', 'ddc', 'rights', 'publication_type',
+        'uuid',
+    ]
+
+    def __init__(self, prefix, lpath, title, authors, mime, date, ContentType, thumbnail_name, other=None):
+
+        MetaInformation.__init__(self, '')
+        self.device_collections = []
+        self._new_book = False
+
+        self.path = os.path.join(prefix, lpath)
+        if os.sep == '\\':
+            self.path = self.path.replace('/', '\\')
+            self.lpath = lpath.replace('\\', '/')
+        else:
+             self.lpath = lpath
+
+        self.title = title
+        if not authors:
+            self.authors = ['']
+        else:
+            self.authors = [authors]
+        self.mime = mime
+        try:
+            self.size = os.path.getsize(self.path)
+        except OSError:
+            self.size = 0
+        try:
+            if ContentType == '6':
+                self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
+            else:
+                self.datetime = time.gmtime(os.path.getctime(self.path))
+        except:
+             self.datetime = time.gmtime()
+
+	if thumbnail_name is not None:
+	    self.thumbnail = ImageWrapper(thumbnail_name)
+        self.tags = []
+        if other:
+            self.smart_update(other)
+
+    def __eq__(self, other):
+        return self.path == getattr(other, 'path', None)
+
+    @dynamic_property
+    def db_id(self):
+        doc = '''The database id in the application database that this file corresponds to'''
+        def fget(self):
+            match = re.search(r'_(\d+)$', self.lpath.rpartition('.')[0])
+            if match:
+                return int(match.group(1))
+            return None
+        return property(fget=fget, doc=doc)
+
+    @dynamic_property
+    def title_sorter(self):
+        doc = '''String to sort the title. If absent, title is returned'''
+        def fget(self):
+            return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
+        return property(doc=doc, fget=fget)
+
+    @dynamic_property
+    def thumbnail(self):
+        return None
+
+    def smart_update(self, other):
+        '''
+        Merge the information in C{other} into self. In case of conflicts, the information
+        in C{other} takes precedence, unless the information in C{other} is NULL.
+        '''
+
+        MetaInformation.smart_update(self, other)
+
+        for attr in self.BOOK_ATTRS:
+            if hasattr(other, attr):
+                val = getattr(other, attr, None)
+                setattr(self, attr, val)
+
+    def to_json(self):
+        json = {}
+        for attr in self.JSON_ATTRS:
+            val = getattr(self, attr)
+            if isbytestring(val):
+                 enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
+                 val = val.decode(enc, 'replace')
+            elif isinstance(val, (list, tuple)):
+                val = [x.decode(preferred_encoding, 'replace') if
+                        isbytestring(x) else x for x in val]
+            json[attr] = val
+        return json
+
+class ImageWrapper(object):
+    def __init__(self, image_path):
+       self.image_path = image_path
+
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -2,17 +2,26 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

 __license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import os
+import sqlite3 as sqlite
+
+from calibre.devices.usbms.books import BookList
+from calibre.devices.kobo.books import Book
+from calibre.devices.kobo.books import ImageWrapper
+from calibre.devices.mime import mime_type_ext
 from calibre.devices.usbms.driver import USBMS
+from calibre import prints

 class KOBO(USBMS):

    name = 'Kobo Reader Device Interface'
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
-    author = 'Kovid Goyal'
+    author = 'Timothy Legge and Kovid Goyal'
+    version = (1, 0, 4)

    supported_platforms = ['windows', 'osx', 'linux']

@ -29,3 +38,320 @@ class KOBO(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True

+    def initialize(self):
+        USBMS.initialize(self)
+        self.book_class = Book
+
+    def books(self, oncard=None, end_session=True):
+        from calibre.ebooks.metadata.meta import path_to_ext
+
+        dummy_bl = BookList(None, None, None)
+
+        if oncard == 'carda' and not self._card_a_prefix:
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return dummy_bl
+        elif oncard == 'cardb' and not self._card_b_prefix:
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return dummy_bl
+        elif oncard and oncard != 'carda' and oncard != 'cardb':
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return dummy_bl
+
+        prefix = self._card_a_prefix if oncard == 'carda' else \
+                 self._card_b_prefix if oncard == 'cardb' \
+                 else self._main_prefix
+
+        # get the metadata cache
+        bl = self.booklist_class(oncard, prefix, self.settings)
+        need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
+
+        # make a dict cache of paths so the lookup in the loop below is faster.
+        bl_cache = {}
+        for idx,b in enumerate(bl):
+            bl_cache[b.lpath] = idx
+
+        def update_booklist(prefix, path, title, authors, mime, date, ContentType, ImageID):
+            changed = False
+            # if path_to_ext(path) in self.FORMATS:
+            try:
+                lpath = path.partition(self.normalize_path(prefix))[2]
+                if lpath.startswith(os.sep):
+                    lpath = lpath[len(os.sep):]
+                    lpath = lpath.replace('\\', '/')
+#                print "LPATH: " + lpath
+
+                path = self.normalize_path(path)
+                # print "Normalized FileName: " + path
+
+                idx = bl_cache.get(lpath, None)
+                if idx is not None:
+                    if ImageID is not None:
+                        imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
+                        #print "Image name Normalized: " + imagename
+                        if imagename is not None:
+                            bl[idx].thumbnail = ImageWrapper(imagename)
+                    bl_cache[lpath] = None
+                    if ContentType != '6':
+                        if self.update_metadata_item(bl[idx]):
+                            # print 'update_metadata_item returned true'
+                            changed = True
+                else:
+                    book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
+                    # print 'Update booklist'
+                    if bl.add_book(book, replace_metadata=False):
+                        changed = True
+            except: # Probably a path encoding error
+                import traceback
+                traceback.print_exc()
+            return changed
+
+        connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
+        cursor = connection.cursor()
+
+        #query = 'select count(distinct volumeId) from volume_shortcovers'
+        #cursor.execute(query)
+        #for row in (cursor):
+        #    numrows = row[0]
+        #cursor.close()
+
+        query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
+                'ImageID from content where BookID is Null'
+
+        cursor.execute (query)
+
+        changed = False
+        for i, row in enumerate(cursor):
+         #  self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
+
+            path = self.path_from_contentid(row[3], row[5], oncard)
+            mime = mime_type_ext(path_to_ext(row[3]))
+
+            if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
+                changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
+                # print "shortbook: " + path
+            elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
+                changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6])
+
+            if changed:
+                need_sync = True
+
+        cursor.close()
+        connection.close()
+
+        # Remove books that are no longer in the filesystem. Cache contains
+        # indices into the booklist if book not in filesystem, None otherwise
+        # Do the operation in reverse order so indices remain valid
+        for idx in sorted(bl_cache.itervalues(), reverse=True):
+            if idx is not None:
+                need_sync = True
+                del bl[idx]
+
+        #print "count found in cache: %d, count of files in metadata: %d, need_sync: %s" % \
+        #      (len(bl_cache), len(bl), need_sync)
+        if need_sync: #self.count_found_in_bl != len(bl) or need_sync:
+            if oncard == 'cardb':
+                self.sync_booklists((None, None, bl))
+            elif oncard == 'carda':
+                self.sync_booklists((None, bl, None))
+            else:
+                self.sync_booklists((bl, None, None))
+
+        self.report_progress(1.0, _('Getting list of books on device...'))
+        return bl
+
+    def delete_via_sql(self, ContentID, ContentType):
+        # Delete Order:
+        #    1) shortcover_page
+        #    2) volume_shorcover
+        #    2) content
+
+        connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite')
+        cursor = connection.cursor()
+        t = (ContentID,)
+        cursor.execute('select ImageID from content where ContentID = ?', t)
+
+        ImageID = None
+        for row in cursor:
+            # First get the ImageID to delete the images
+            ImageID = row[0]
+        cursor.close()
+
+        cursor = connection.cursor()
+        if ContentType == 6:
+            # Delete the shortcover_pages first
+            cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
+
+        #Delete the volume_shortcovers second
+        cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
+
+        # Delete the chapters associated with the book next
+        t = (ContentID,ContentID,)
+        cursor.execute('delete from content where BookID  = ? or ContentID = ?', t)
+
+        connection.commit()
+
+        cursor.close()
+        if ImageID != None:
+            print "Error condition ImageID was not found"
+            print "You likely tried to delete a book that the kobo has not yet added to the database"
+
+        connection.close()
+        # If all this succeeds we need to delete the images files via the ImageID
+        return ImageID
+
+    def delete_images(self, ImageID):
+        if ImageID != None:
+            path_prefix = '.kobo/images/'
+            path = self._main_prefix + path_prefix + ImageID
+
+            file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed',)
+
+            for ending in file_endings:
+                fpath = path + ending
+                fpath = self.normalize_path(fpath)
+
+                if os.path.exists(fpath):
+                    # print 'Image File Exists: ' + fpath
+                    os.unlink(fpath)
+
+    def delete_books(self, paths, end_session=True):
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
+            path = self.normalize_path(path)
+            # print "Delete file normalized path: " + path
+            extension =  os.path.splitext(path)[1]
+
+            if extension == '.kobo':
+                # Kobo books do not have book files.  They do have some images though
+                #print "kobo book"
+                ContentType = 6
+                ContentID = self.contentid_from_path(path, ContentType)
+            elif extension == '.pdf' or extension == '.epub':
+                # print "ePub or pdf"
+                ContentType = 16
+                #print "Path: " + path
+                ContentID = self.contentid_from_path(path, ContentType)
+                # print "ContentID: " + ContentID
+            else: # if extension == '.html' or extension == '.txt':
+                ContentType = 999 # Yet another hack: to get around Kobo changing how ContentID is stored
+                ContentID = self.contentid_from_path(path, ContentType)
+                 
+            ImageID = self.delete_via_sql(ContentID, ContentType)
+            #print " We would now delete the Images for" + ImageID
+            self.delete_images(ImageID)
+
+            if os.path.exists(path):
+                # Delete the ebook
+                # print "Delete the ebook: " + path
+                os.unlink(path)
+
+                filepath = os.path.splitext(path)[0]
+                for ext in self.DELETE_EXTS:
+                    if os.path.exists(filepath + ext):
+                        # print "Filename: " + filename
+                        os.unlink(filepath + ext)
+                    if os.path.exists(path + ext):
+                        # print "Filename: " + filename
+                        os.unlink(path + ext)
+
+                if self.SUPPORTS_SUB_DIRS:
+                    try:
+                        # print "removed"
+                        os.removedirs(os.path.dirname(path))
+                    except:
+                        pass
+        self.report_progress(1.0, _('Removing books from device...'))
+
+    def remove_books_from_metadata(self, paths, booklists):
+        for i, path in enumerate(paths):
+            self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
+            for bl in booklists:
+                for book in bl:
+                    #print "Book Path: " + book.path
+                    if path.endswith(book.path):
+                        #print "    Remove: " + book.path
+                        bl.remove_book(book)
+        self.report_progress(1.0, _('Removing books from device metadata listing...'))
+
+    def add_books_to_metadata(self, locations, metadata, booklists):
+        metadata = iter(metadata)
+        for i, location in enumerate(locations):
+            self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
+            info = metadata.next()
+            blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
+
+            # Extract the correct prefix from the pathname. To do this correctly,
+            # we must ensure that both the prefix and the path are normalized
+            # so that the comparison will work. Book's __init__ will fix up
+            # lpath, so we don't need to worry about that here.
+            path = self.normalize_path(location[0])
+            if self._main_prefix:
+                prefix = self._main_prefix if \
+                           path.startswith(self.normalize_path(self._main_prefix)) else None
+            if not prefix and self._card_a_prefix:
+                prefix = self._card_a_prefix if \
+                           path.startswith(self.normalize_path(self._card_a_prefix)) else None
+            if not prefix and self._card_b_prefix:
+                prefix = self._card_b_prefix if \
+                           path.startswith(self.normalize_path(self._card_b_prefix)) else None
+            if prefix is None:
+                prints('in add_books_to_metadata. Prefix is None!', path,
+                        self._main_prefix)
+                continue
+            #print "Add book to metatdata: "
+            #print "prefix: " + prefix
+            lpath = path.partition(prefix)[2]
+            if lpath.startswith('/') or lpath.startswith('\\'):
+                lpath = lpath[1:]
+            #print "path: " + lpath
+            #book = self.book_class(prefix, lpath, other=info)
+            lpath = self.normalize_path(prefix + lpath)
+            book = Book(prefix, lpath, '', '', '', '', '', '', other=info)
+            if book.size is None:
+                book.size = os.stat(self.normalize_path(path)).st_size
+            booklists[blist].add_book(book, replace_metadata=True)
+        self.report_progress(1.0, _('Adding books to device metadata listing...'))
+
+    def contentid_from_path(self, path, ContentType):
+        if ContentType == 6:
+            ContentID = os.path.splitext(path)[0]
+            # Remove the prefix on the file.  it could be either
+            ContentID = ContentID.replace(self._main_prefix, '')
+            if self._card_a_prefix is not None:
+                ContentID = ContentID.replace(self._card_a_prefix, '')
+        elif ContentType == 999: # HTML Files
+            ContentID = path
+            ContentID = ContentID.replace(self._main_prefix, "/mnt/onboard/")
+            if self._card_a_prefix is not None:
+                ContentID = ContentID.replace(self._card_a_prefix, "/mnt/sd/")
+        else: # ContentType = 16
+            ContentID = path
+            ContentID = ContentID.replace(self._main_prefix, "file:///mnt/onboard/")
+            if self._card_a_prefix is not None:
+                ContentID = ContentID.replace(self._card_a_prefix, "file:///mnt/sd/")
+        ContentID = ContentID.replace("\\", '/')
+        return ContentID
+
+
+    def path_from_contentid(self, ContentID, ContentType, oncard):
+        path = ContentID
+
+        if oncard == 'cardb':
+            print 'path from_contentid cardb'
+        elif oncard == 'carda':
+            path = path.replace("file:///mnt/sd/", self._card_a_prefix)
+            # print "SD Card: " + filename
+        else:
+            if ContentType == "6":
+                # This is a hack as the kobo files do not exist
+                # but the path is required to make a unique id
+                # for calibre's reference
+                path = self._main_prefix + path + '.kobo'
+                # print "Path: " + path
+            else:
+                # if path.startswith("file:///mnt/onboard/"):
+                path = path.replace("file:///mnt/onboard/", self._main_prefix)
+                path = path.replace("/mnt/onboard/", self._main_prefix)
+                    # print "Internal: " + filename
+
+        return path
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -49,3 +49,41 @@ class AVANT(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True

+class SWEEX(USBMS):
+    name           = 'Sweex Device Interface'
+    gui_name       = 'Sweex'
+    description    = _('Communicate with the Sweex MM300')
+    author         = 'Kovid Goyal'
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    # Ordered list of supported formats
+    FORMATS     = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
+
+    VENDOR_ID   = [0x0525]
+    PRODUCT_ID  = [0xa4a5]
+    BCD         = [0x0319]
+
+    VENDOR_NAME = 'SWEEX'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
+
+    EBOOK_DIR_MAIN = ''
+    SUPPORTS_SUB_DIRS = True
+
+class PDNOVEL(USBMS):
+    name = 'Pandigital Novel device interface'
+    gui_name = 'PD Novel'
+    description = _('Communicate with the Pandigital Novel')
+    author = 'Kovid Goyal'
+    supported_platforms = ['windows', 'linux', 'osx']
+    FORMATS = ['epub', 'pdf']
+
+    VENDOR_ID   = [0x18d1]
+    PRODUCT_ID  = [0xb004]
+    BCD         = [0x224]
+
+    VENDOR_NAME = 'ANDROID'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '__UMS_COMPOSITE'
+
+    EBOOK_DIR_MAIN = 'eBooks'
+    SUPPORTS_SUB_DIRS = False
+
--- a/src/calibre/devices/nokia/driver.py
+++ b/src/calibre/devices/nokia/driver.py
@ -59,6 +59,27 @@ class E71X(USBMS):
    BCD         = [0x100]


+    FORMATS = ['mobi', 'prc']
+
+    EBOOK_DIR_MAIN = 'eBooks'
+    SUPPORTS_SUB_DIRS = True
+
+    VENDOR_NAME      = 'NOKIA'
+    WINDOWS_MAIN_MEM = 'S60'
+
+class E52(USBMS):
+
+    name = 'Nokia E52 device interface'
+    gui_name = 'Nokia E52'
+    description = _('Communicate with the Nokia E52')
+    author = 'David Ignjic'
+    supported_platforms = ['windows', 'linux', 'osx']
+
+    VENDOR_ID = [0x421]
+    PRODUCT_ID = [0x1CD]
+    BCD = [0x100]
+
+
    FORMATS = ['mobi', 'prc']

    EBOOK_DIR_MAIN = 'eBooks'
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -99,7 +99,7 @@ class PRS505(USBMS):
        if self._card_b_prefix is not None:
            if not write_cache(self._card_b_prefix):
                self._card_b_prefix = None
-
+        self.booklist_class.rebuild_collections = self.rebuild_collections

    def get_device_information(self, end_session=True):
        return (self.gui_name, '', '', '')
@ -145,7 +145,7 @@ class PRS505(USBMS):
                blists[i] = booklists[i]
        opts = self.settings()
        if opts.extra_customization:
-            collections = [x.strip() for x in
+            collections = [x.lower().strip() for x in
                    opts.extra_customization.split(',')]
        else:
            collections = []
@ -156,4 +156,10 @@ class PRS505(USBMS):
        USBMS.sync_booklists(self, booklists, end_session=end_session)
        debug_print('PRS505: finished sync_booklists')

+    def rebuild_collections(self, booklist, oncard):
+        debug_print('PRS505: started rebuild_collections on card', oncard)
+        c = self.initialize_XML_cache()
+        c.rebuild_collections(booklist, {'carda':1, 'cardb':2}.get(oncard, 0))
+        c.write()
+        debug_print('PRS505: finished rebuild_collections')

--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -6,10 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os, time
-from pprint import pprint
 from base64 import b64decode
 from uuid import uuid4
-
 from lxml import etree

 from calibre import prints, guess_type
@ -62,8 +60,7 @@ class XMLCache(object):

    def __init__(self, paths, prefixes, use_author_sort):
        if DEBUG:
-            debug_print('Building XMLCache...')
-            pprint(paths)
+            debug_print('Building XMLCache...', paths)
        self.paths = paths
        self.prefixes = prefixes
        self.use_author_sort = use_author_sort
@ -147,38 +144,72 @@ class XMLCache(object):
                        if title+str(i) not in seen:
                            title = title+str(i)
                            playlist.set('title', title)
+                            seen.add(title)
                            break
                else:
                    seen.add(title)

-    def get_playlist_map(self):
-        debug_print('Start get_playlist_map')
-        ans = {}
+    def build_id_playlist_map(self, bl_index):
+        '''
+        Return a map of the collections in books: {lpaths: [collection names]}
+        '''
+        debug_print('Start build_id_playlist_map')
        self.ensure_unique_playlist_titles()
-        debug_print('after ensure_unique_playlist_titles')
        self.prune_empty_playlists()
-        debug_print('get_playlist_map loop')
-        for i, root in self.record_roots.items():
-            debug_print('get_playlist_map loop', i)
+        debug_print('after cleaning playlists')
+        root = self.record_roots[bl_index]
+        if root is None:
+            return
        id_map = self.build_id_map(root)
-            ans[i] = []
+        playlist_map = {}
+        # foreach playlist, get the lpaths for the ids in it, then add to dict
        for playlist in root.xpath('//*[local-name()="playlist"]'):
-                items = []
+            name = playlist.get('title')
+            if name is None:
+                debug_print('build_id_playlist_map: unnamed playlist!')
+                continue
            for item in playlist:
+                # translate each id into its lpath
                id_ = item.get('id', None)
-                    record = id_map.get(id_, None)
-                    if record is not None:
-                        items.append(record)
-                ans[i].append((playlist.get('title'), items))
-        debug_print('end get_playlist_map')
-        return ans
+                if id_ is None:
+                    debug_print('build_id_playlist_map: id_ is None!')
+                    continue
+                bk = id_map.get(id_, None)
+                if bk is None:
+                    debug_print('build_id_playlist_map: book is None!', id_)
+                    continue
+                lpath = bk.get('path', None)
+                if lpath is None:
+                    debug_print('build_id_playlist_map: lpath is None!', id_)
+                    continue
+                if lpath not in playlist_map:
+                    playlist_map[lpath] = []
+                playlist_map[lpath].append(name)
+        debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
+        return playlist_map
+
+    def reset_existing_playlists_map(self):
+        '''
+        Call this method before calling get_or_create_playlist in the context of
+        a given job. Call it again after deleting any playlists. The current
+        implementation adds all new playlists before deleting any, so that
+        constraint is respected.
+        '''
+        self._playlist_to_playlist_id_map = {}

    def get_or_create_playlist(self, bl_idx, title):
+        # maintain a private map of playlists to their ids. Don't check if it
+        # exists, because reset_existing_playlist_map must be called before it
+        # is used to ensure that deleted playlists are taken into account
        root = self.record_roots[bl_idx]
+        if bl_idx not in self._playlist_to_playlist_id_map:
+            self._playlist_to_playlist_id_map[bl_idx] = {}
            for playlist in root.xpath('//*[local-name()="playlist"]'):
-            if playlist.get('title', None) == title:
-                return playlist
-        if DEBUG:
+                pl_title = playlist.get('title', None)
+                if pl_title is not None:
+                    self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
+        if title in self._playlist_to_playlist_id_map[bl_idx]:
+            return self._playlist_to_playlist_id_map[bl_idx][title]
        debug_print('Creating playlist:', title)
        ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
                nsmap=root.nsmap, attrib={
@ -188,11 +219,11 @@ class XMLCache(object):
                    'sourceid': '1'
                    })
        root.append(ans)
+        self._playlist_to_playlist_id_map[bl_idx][title] = ans
        return ans
    # }}}

    def fix_ids(self): # {{{
-        if DEBUG:
        debug_print('Running fix_ids()')

        def ensure_numeric_ids(root):
@ -251,6 +282,8 @@ class XMLCache(object):
                ensure_media_xml_base_ids(root)

            idmap = ensure_numeric_ids(root)
+            if len(idmap) > 0:
+                debug_print('fix_ids: found some non-numeric ids')
                remap_playlist_references(root, idmap)
            if i == 0:
                sourceid, playlist_sid = 1, 0
@ -276,38 +309,19 @@ class XMLCache(object):
    def update_booklist(self, bl, bl_index):
        if bl_index not in self.record_roots:
            return
-        if DEBUG:
        debug_print('Updating JSON cache:', bl_index)
+        playlist_map = self.build_id_playlist_map(bl_index)
        root = self.record_roots[bl_index]
-        pmap = self.get_playlist_map()[bl_index]
-        playlist_map = {}
-        for title, records in pmap:
-            for record in records:
-                path = record.get('path', None)
-                if path:
-                    if path not in playlist_map:
-                        playlist_map[path] = []
-                    playlist_map[path].append(title)
-
        lpath_map = self.build_lpath_map(root)
        for book in bl:
            record = lpath_map.get(book.lpath, None)
            if record is not None:
                title = record.get('title', None)
                if title is not None and title != book.title:
-                    if DEBUG:
                    debug_print('Renaming title', book.title, 'to', title)
                    book.title = title
-# We shouldn't do this for Sonys, because the reader strips
-# all but the first author.
-#                authors = record.get('author', None)
-#                if authors is not None:
-#                    authors = string_to_authors(authors)
-#                    if authors != book.authors:
-#                        if DEBUG:
-#                            prints('Renaming authors', book.authors, 'to',
-#                                    authors)
-#                        book.authors = authors
+                    # Don't set the author, because the reader strips all but
+                    # the first author.
                for thumbnail in record.xpath(
                        'descendant::*[local-name()="thumbnail"]'):
                    for img in thumbnail.xpath(
@ -318,47 +332,57 @@ class XMLCache(object):
                            book.thumbnail = raw
                            break
                    break
-                if book.lpath in playlist_map:
-                    tags = playlist_map[book.lpath]
-                    book.device_collections = tags
+                book.device_collections = playlist_map.get(book.lpath, [])
        debug_print('Finished updating JSON cache:', bl_index)

    # }}}

    # Update XML from JSON {{{
    def update(self, booklists, collections_attributes):
-        debug_print('Starting update XML from JSON')
-        playlist_map = self.get_playlist_map()
-
+        debug_print('Starting update', collections_attributes)
        for i, booklist in booklists.items():
-            if DEBUG:
+            playlist_map = self.build_id_playlist_map(i)
            debug_print('Updating XML Cache:', i)
            root = self.record_roots[i]
            lpath_map = self.build_lpath_map(root)
+            gtz_count = ltz_count = 0
            for book in booklist:
                path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
-#                record = self.book_by_lpath(book.lpath, root)
                record = lpath_map.get(book.lpath, None)
                if record is None:
                    record = self.create_text_record(root, i, book.lpath)
-                self.update_text_record(record, book, path, i)
-
-            bl_pmap = playlist_map[i]
-            self.update_playlists(i, root, booklist, bl_pmap,
-                    collections_attributes)
+                (gtz_count, ltz_count) = self.update_text_record(record, book,
+                                                path, i, gtz_count, ltz_count)
+                # Ensure the collections in the XML database are recorded for
+                # this book
+                if book.device_collections is None:
+                    book.device_collections = []
+                book.device_collections = playlist_map.get(book.lpath, [])
+            debug_print('Timezone votes: %d GMT, %d LTZ'%(gtz_count, ltz_count))
+            self.update_playlists(i, root, booklist, collections_attributes)
+        # Update the device collections because update playlist could have added
+        # some new ones.
+        debug_print('In update/ Starting refresh of device_collections')
+        for i, booklist in booklists.items():
+            playlist_map = self.build_id_playlist_map(i)
+            for book in booklist:
+                book.device_collections = playlist_map.get(book.lpath, [])
+        self.fix_ids()
+        debug_print('Finished update')

+    def rebuild_collections(self, booklist, bl_index):
+        if bl_index not in self.record_roots:
+            return
+        root = self.record_roots[bl_index]
+        self.update_playlists(bl_index, root, booklist, [])
        self.fix_ids()

-        # This is needed to update device_collections
-        for i, booklist in booklists.items():
-            self.update_booklist(booklist, i)
-        debug_print('Finished update XML from JSON')
-
-    def update_playlists(self, bl_index, root, booklist, playlist_map,
-            collections_attributes):
-        debug_print('Starting update_playlists')
+    def update_playlists(self, bl_index, root, booklist, collections_attributes):
+        debug_print('Starting update_playlists', collections_attributes, bl_index)
+        self.reset_existing_playlists_map()
        collections = booklist.get_collections(collections_attributes)
        lpath_map = self.build_lpath_map(root)
+        debug_print('update_playlists: finished building maps')
        for category, books in collections.items():
            records = [lpath_map.get(b.lpath, None) for b in books]
            # Remove any books that were not found, although this
@ -367,25 +391,34 @@ class XMLCache(object):
                debug_print('WARNING: Some elements in the JSON cache were not'
                        ' found in the XML cache')
            records = [x for x in records if x is not None]
+            # Ensure each book has an ID.
            for rec in records:
                if rec.get('id', None) is None:
                    rec.set('id', str(self.max_id(root)+1))
            ids = [x.get('id', None) for x in records]
+            # Given that we set the ids, there shouldn't be any None's. But
+            # better to be safe...
            if None in ids:
-                if DEBUG:
                debug_print('WARNING: Some <text> elements do not have ids')
                ids = [x for x in ids if x is not None]

            playlist = self.get_or_create_playlist(bl_index, category)
+            # Get the books currently in the playlist. We will need them to be
+            # sure to put back any books that were manually added.
            playlist_ids = []
            for item in playlist:
                id_ = item.get('id', None)
                if id_ is not None:
                    playlist_ids.append(id_)
+            # Empty the playlist. We do this so that the playlist will have the
+            # order specified by get_collections
            for item in list(playlist):
                playlist.remove(item)

+            # Get a list of ids not known by get_collections
            extra_ids = [x for x in playlist_ids if x not in ids]
+            # Rebuild the collection in the order specified by get_collections. Then
+            # add the ids that get_collections didn't know about.
            for id_ in ids + extra_ids:
                item = playlist.makeelement(
                        '{%s}item'%self.namespaces[bl_index],
@ -423,11 +456,38 @@ class XMLCache(object):
        root.append(ans)
        return ans

-    def update_text_record(self, record, book, path, bl_index):
+    def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count):
+        '''
+        Update the Sony database from the book. This is done if the timestamp in
+        the db differs from the timestamp on the file.
+        '''
+
+        # It seems that a Sony device can sometimes know what timezone it is in,
+        # and apparently converts the dates to GMT when it writes them to the
+        # db. Unfortunately, we can't tell when it does this, so we use a
+        # horrible heuristic. First, set dates only for new books, trying to
+        # avoid upsetting the sony. Use the timezone determined through the
+        # voting described next. Second, voting: if a book is not new, compare
+        # its Sony DB date against localtime and gmtime. Count the matches. When
+        # we must set a date, use the one with the most matches. Use localtime
+        # if the case of a tie, and hope it is right.
        timestamp = os.path.getmtime(path)
-        date = strftime(timestamp)
-        if date != record.get('date', None):
+        rec_date = record.get('date', None)
+        if not getattr(book, '_new_book', False): # book is not new
+            if strftime(timestamp, zone=time.gmtime) == rec_date:
+                gtz_count += 1
+            elif strftime(timestamp, zone=time.localtime) == rec_date:
+                ltz_count += 1
+        else: # book is new. Set the time using the current votes
+            if ltz_count >= gtz_count:
+                tz = time.localtime
+                debug_print("Using localtime TZ for new book", book.lpath)
+            else:
+                tz = time.gmtime
+                debug_print("Using GMT TZ for new book", book.lpath)
+            date = strftime(timestamp, zone=tz)
            record.set('date', date)
+
        record.set('size', str(os.stat(path).st_size))
        title = book.title if book.title else _('Unknown')
        record.set('title', title)
@ -452,6 +512,7 @@ class XMLCache(object):
        if 'id' not in record.attrib:
            num = self.max_id(record.getroottree().getroot())
            record.set('id', str(num+1))
+        return (gtz_count, ltz_count)
    # }}}

    # Writing the XML files {{{
@ -544,10 +605,5 @@ class XMLCache(object):
                        break
                self.namespaces[i] = ns

-#        if DEBUG:
-#            debug_print('Found nsmaps:')
-#            pprint(self.nsmaps)
-#            debug_print('Found namespaces:')
-#            pprint(self.namespaces)
    # }}}

--- a/src/calibre/devices/scanner.py
+++ b/src/calibre/devices/scanner.py
@ -98,6 +98,9 @@ class LinuxScanner(object):

    def __call__(self):
        ans = set([])
+        if not self.ok:
+            raise RuntimeError('DeviceScanner requires the /sys filesystem to work.')
+
        for x in os.listdir(self.base):
            base = os.path.join(self.base, x)
            ven = os.path.join(base, 'idVendor')
@ -145,8 +148,6 @@ class DeviceScanner(object):
    def __init__(self, *args):
        if isosx and osx_scanner is None:
            raise RuntimeError('The Python extension usbobserver must be available on OS X.')
-        if islinux and not linux_scanner.ok:
-            raise RuntimeError('DeviceScanner requires the /sys filesystem to work.')
        self.scanner = win_scanner if iswindows else osx_scanner if isosx else linux_scanner
        self.devices = []

--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -11,10 +11,11 @@ from calibre.devices.mime import mime_type_ext
 from calibre.devices.interface import BookList as _BookList
 from calibre.constants import filesystem_encoding, preferred_encoding
 from calibre import isbytestring
+from calibre.utils.config import prefs

 class Book(MetaInformation):

-    BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections']
+    BOOK_ATTRS = ['lpath', 'size', 'mime', 'device_collections', '_new_book']

    JSON_ATTRS = [
        'lpath', 'title', 'authors', 'mime', 'size', 'tags', 'author_sort',
@ -29,6 +30,7 @@ class Book(MetaInformation):

        MetaInformation.__init__(self, '')

+        self._new_book = False
        self.device_collections = []
        self.path = os.path.join(prefix, lpath)
        if os.sep == '\\':
@ -76,7 +78,7 @@ class Book(MetaInformation):
        in C{other} takes precedence, unless the information in C{other} is NULL.
        '''

-        MetaInformation.smart_update(self, other)
+        MetaInformation.smart_update(self, other, replace_tags=True)

        for attr in self.BOOK_ATTRS:
            if hasattr(other, attr):
@ -130,12 +132,37 @@ class CollectionsBookList(BookList):
        return True

    def get_collections(self, collection_attributes):
+        from calibre.devices.usbms.driver import debug_print
+        debug_print('Starting get_collections:', prefs['manage_device_metadata'])
        collections = {}
        series_categories = set([])
-        collection_attributes = list(collection_attributes)+['device_collections']
-        for attr in collection_attributes:
-            attr = attr.strip()
+        # This map of sets is used to avoid linear searches when testing for
+        # book equality
+        collections_lpaths = {}
        for book in self:
+            # Make sure we can identify this book via the lpath
+            lpath = getattr(book, 'lpath', None)
+            if lpath is None:
+                continue
+            # Decide how we will build the collections. The default: leave the
+            # book in all existing collections. Do not add any new ones.
+            attrs = ['device_collections']
+            if getattr(book, '_new_book', False):
+                if prefs['manage_device_metadata'] == 'manual':
+                    # Ensure that the book is in all the book's existing
+                    # collections plus all metadata collections
+                    attrs += collection_attributes
+                else:
+                    # For new books, both 'on_send' and 'on_connect' do the same
+                    # thing. The book's existing collections are ignored. Put
+                    # the book in collections defined by its metadata.
+                    attrs = collection_attributes
+            elif prefs['manage_device_metadata'] == 'on_connect':
+                # For existing books, modify the collections only if the user
+                # specified 'on_connect'
+                attrs = collection_attributes
+            for attr in attrs:
+                attr = attr.strip()
                val = getattr(book, attr, None)
                if not val: continue
                if isbytestring(val):
@ -150,11 +177,12 @@ class CollectionsBookList(BookList):
                        continue
                    if category not in collections:
                        collections[category] = []
-                    if book not in collections[category]:
+                        collections_lpaths[category] = set()
+                    if lpath not in collections_lpaths[category]:
+                        collections_lpaths[category].add(lpath)
                        collections[category].append(book)
                    if attr == 'series':
                        series_categories.add(category)
-
        # Sort collections
        for category, books in collections.items():
            def tgetter(x):
@ -167,3 +195,15 @@ class CollectionsBookList(BookList):
                books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
        return collections

+    def rebuild_collections(self, booklist, oncard):
+        '''
+        For each book in the booklist for the card oncard, remove it from all
+        its current collections, then add it to the collections specified in
+        device_collections.
+
+        oncard is None for the main memory, carda for card A, cardb for card B,
+        etc.
+
+        booklist is the object created by the :method:`books` call above.
+        '''
+        pass
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -78,9 +78,6 @@ class Device(DeviceConfig, DevicePlugin):
    STORAGE_CARD_VOLUME_LABEL = ''
    STORAGE_CARD2_VOLUME_LABEL = None

-    SUPPORTS_SUB_DIRS = False
-    MUST_READ_METADATA = False
-    SUPPORTS_USE_AUTHOR_SORT = False

    EBOOK_DIR_MAIN = ''
    EBOOK_DIR_CARD_A = ''
@ -735,7 +732,7 @@ class Device(DeviceConfig, DevicePlugin):
                traceback.print_exc()
        self._main_prefix = self._card_a_prefix = self._card_b_prefix = None

-    def get_main_ebook_dir(self):
+    def get_main_ebook_dir(self, for_upload=False):
        return self.EBOOK_DIR_MAIN

    def _sanity_check(self, on_card, files):
@ -753,7 +750,7 @@ class Device(DeviceConfig, DevicePlugin):
            path = os.path.join(self._card_b_prefix,
                    *(self.EBOOK_DIR_CARD_B.split('/')))
        else:
-            candidates = self.get_main_ebook_dir()
+            candidates = self.get_main_ebook_dir(for_upload=True)
            if isinstance(candidates, basestring):
                candidates = [candidates]
            candidates = [
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -13,6 +13,10 @@ class DeviceConfig(object):
    EXTRA_CUSTOMIZATION_MESSAGE = None
    EXTRA_CUSTOMIZATION_DEFAULT = None

+    SUPPORTS_SUB_DIRS = False
+    MUST_READ_METADATA = False
+    SUPPORTS_USE_AUTHOR_SORT = False
+
    #: If None the default is used
    SAVE_TEMPLATE = None

@ -23,9 +27,14 @@ class DeviceConfig(object):
            config().parse().send_template

    @classmethod
-    def _config(cls):
+    def _config_base_name(cls):
        klass = cls if isinstance(cls, type) else cls.__class__
-        c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers'))
+        return klass.__name__
+
+    @classmethod
+    def _config(cls):
+        name = cls._config_base_name()
+        c = Config('device_drivers_%s' % name, _('settings for device drivers'))
        c.add_opt('format_map', default=cls.FORMATS,
                help=_('Ordered list of formats the device will accept'))
        c.add_opt('use_subdirs', default=True,
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -58,7 +58,7 @@ class USBMS(CLI, Device):

        debug_print ('USBMS: Fetching list of books from device. oncard=', oncard)

-        dummy_bl = BookList(None, None, None)
+        dummy_bl = self.booklist_class(None, None, None)

        if oncard == 'carda' and not self._card_a_prefix:
            self.report_progress(1.0, _('Getting list of books on device...'))
@ -78,6 +78,8 @@ class USBMS(CLI, Device):
            self.EBOOK_DIR_CARD_B if oncard == 'cardb' else \
            self.get_main_ebook_dir()

+        debug_print ('USBMS: dirs are:', prefix, ebook_dirs)
+
        # get the metadata cache
        bl = self.booklist_class(oncard, prefix, self.settings)
        need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE)
@ -233,6 +235,7 @@ class USBMS(CLI, Device):
            book = self.book_class(prefix, lpath, other=info)
            if book.size is None:
                book.size = os.stat(self.normalize_path(path)).st_size
+            book._new_book = True # Must be before add_book
            booklists[blist].add_book(book, replace_metadata=True)
        self.report_progress(1.0, _('Adding books to device metadata listing...'))
        debug_print('USBMS: finished adding metadata')
@ -273,6 +276,9 @@ class USBMS(CLI, Device):
        self.report_progress(1.0, _('Removing books from device metadata listing...'))
        debug_print('USBMS: finished removing metadata for %d books'%(len(paths)))

+    # If you override this method and you use book._new_book, then you must
+    # complete the processing before you call this method. The flag is cleared
+    # at the end just before the return
    def sync_booklists(self, booklists, end_session=True):
        debug_print('USBMS: starting sync_booklists')

@ -286,11 +292,18 @@ class USBMS(CLI, Device):
                js = [item.to_json() for item in booklists[listid] if
                        hasattr(item, 'to_json')]
                with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:
-                    json.dump(js, f, indent=2, encoding='utf-8')
+                    f.write(json.dumps(js, indent=2, encoding='utf-8'))
        write_prefix(self._main_prefix, 0)
        write_prefix(self._card_a_prefix, 1)
        write_prefix(self._card_b_prefix, 2)

+        # Clear the _new_book indication, as we are supposed to be done with
+        # adding books at this point
+        for blist in booklists:
+            if blist is not None:
+                for book in blist:
+                    book._new_book = False
+
        self.report_progress(1.0, _('Sending metadata to device...'))
        debug_print('USBMS: finished sync_booklists')

--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -49,7 +49,6 @@ class CHMInput(InputFormatPlugin):
            log.debug('stream.name=%s' % stream.name)
            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
            mainpath = os.path.join(tdir, mainname)
-            #raw_input()

            metadata = get_metadata_from_reader(self._chm_reader)

@ -92,7 +91,7 @@ class CHMInput(InputFormatPlugin):
            metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
-            metadata.add('language', get_lang())
+            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', _('Unknown'))
@ -141,10 +140,9 @@ class CHMInput(InputFormatPlugin):
        log.debug('Found %d section nodes' % len(chapters))
        htmlpath = os.path.splitext(hhcpath)[0] + ".html"
        f = open(htmlpath, 'wb')
+        if chapters:
            f.write('<html><head><meta http-equiv="Content-type"'
                ' content="text/html;charset=UTF-8" /></head><body>\n')
-
-        if chapters:
            path0 = chapters[0][1]
            subpath = os.path.dirname(path0)

@ -159,6 +157,8 @@ class CHMInput(InputFormatPlugin):
                f.write(url)

            f.write("</body></html>")
+        else:
+            f.write(hhcdata)
        f.close()
        return htmlpath

--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -8,7 +8,7 @@ import os, re
 from mimetypes import guess_type as guess_mimetype

 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
-from calibre.constants import iswindows
+from calibre.constants import iswindows, filesystem_encoding
 from calibre.utils.chm.chm import CHMFile
 from calibre.utils.chm.chmlib import (
  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -78,6 +78,8 @@ class CHMError(Exception):
 class CHMReader(CHMFile):
    def __init__(self, input, log):
        CHMFile.__init__(self)
+        if isinstance(input, unicode):
+            input = input.encode(filesystem_encoding)
        if not self.LoadCHM(input):
            raise CHMError("Unable to open CHM file '%s'"%(input,))
        self.log = log
@ -91,7 +93,6 @@ class CHMReader(CHMFile):
        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
        self.hhc_path = self.root + ".hhc"

-
    def _parse_toc(self, ul, basedir=os.getcwdu()):
        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
        self._playorder += 1
@ -152,6 +153,8 @@ class CHMReader(CHMFile):
                if f.lower() == self.hhc_path.lower():
                    self.hhc_path = f
                    break
+        if self.hhc_path not in files and files:
+            self.hhc_path = files[0]

    def _reformat(self, data):
        try:
@ -159,7 +162,7 @@ class CHMReader(CHMFile):
            soup = BeautifulSoup(data)
        except ValueError:
            # hit some strange encoding problems...
-            print "Unable to parse html for cleaning, leaving it :("
+            self.log.exception("Unable to parse html for cleaning, leaving it")
            return data
        # nuke javascript...
        [s.extract() for s in soup('script')]
--- a/src/calibre/ebooks/compression/palmdoc.c
+++ b/src/calibre/ebooks/compression/palmdoc.c
@ -151,6 +151,7 @@ cpalmdoc_do_compress(buffer *b, char *output) {
            for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
        }
    }
+    PyMem_Free(temp.data);
    return output - head;
 }

@ -168,7 +169,9 @@ cpalmdoc_compress(PyObject *self, PyObject *args) {
    for (j = 0; j < input_len; j++) 
        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
    b.len = input_len;
-    output = (char *)PyMem_Malloc(sizeof(char) * b.len);
+    // Make the output buffer larger than the input as sometimes
+    // compression results in a larger block
+    output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
    if (output == NULL) return PyErr_NoMemory();
    j = cpalmdoc_do_compress(&b, output);
    if ( j == 0) return PyErr_NoMemory();
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)

 LIGATURES = {
-        u'\u00c6': u'AE',
-        u'\u00e6': u'ae',
-        u'\u0152': u'OE',
-        u'\u0153': u'oe',
-        u'\u0132': u'IJ',
-        u'\u0133': u'ij',
-        u'\u1D6B': u'ue',
+#        u'\u00c6': u'AE',
+#        u'\u00e6': u'ae',
+#        u'\u0152': u'OE',
+#        u'\u0153': u'oe',
+#        u'\u0132': u'IJ',
+#        u'\u0133': u'ij',
+#        u'\u1D6B': u'ue',
        u'\uFB00': u'ff',
        u'\uFB01': u'fi',
        u'\uFB02': u'fl',
@ -107,9 +107,21 @@ class CSSPreProcessor(object):

    PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')

-    def __call__(self, data):
+    def __call__(self, data, add_namespace=False):
+        from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
        data = self.PAGE_PAT.sub('', data)
+        if not add_namespace:
            return data
+        ans, namespaced = [], False
+        for line in data.splitlines():
+            ll = line.lstrip()
+            if not (namespaced or ll.startswith('@import') or
+                        ll.startswith('@charset')):
+                ans.append(XHTML_CSS_NAMESPACE.strip())
+                namespaced = True
+            ans.append(line)
+
+        return u'\n'.join(ans)

 class HTMLPreProcessor(object):

@ -268,7 +280,7 @@ class HTMLPreProcessor(object):

        if getattr(self.extra_opts, 'remove_footer', None):
            try:
-                rules.insert(0
+                rules.insert(0,
                    (re.compile(self.extra_opts.footer_regex), lambda match : '')
                )
            except:
--- a/src/calibre/ebooks/epub/fix/init.py
+++ b/src/calibre/ebooks/epub/fix/init.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize import Plugin
+
+class InvalidEpub(ValueError):
+    pass
+
+class ePubFixer(Plugin):
+
+    supported_platforms = ['windows', 'osx', 'linux']
+    author = 'Kovid Goyal'
+    type = _('ePub Fixer')
+    can_be_disabled = True
+
+    # API that subclasses must implement {{{
+    @property
+    def short_description(self):
+        raise NotImplementedError
+
+    @property
+    def long_description(self):
+        raise NotImplementedError
+
+    @property
+    def fix_name(self):
+        raise NotImplementedError
+
+    @property
+    def options(self):
+        '''
+        Return a list of 4-tuples
+        (option_name, type, default, help_text)
+        type is one of 'bool', 'int', 'string'
+        '''
+        return []
+
+    def run(self, container, opts, log, fix=False):
+        raise NotImplementedError
+    # }}}
+
+    def add_options_to_parser(self, parser):
+        parser.add_option('--' + self.fix_name.replace('_', '-'),
+                help=self.long_description, action='store_true', default=False)
+        for option in self.options:
+            action = 'store'
+            if option[1] == 'bool':
+                action = 'store_true'
+            kwargs = {'action': action, 'default':option[2], 'help':option[3]}
+            if option[1] != 'bool':
+                kwargs['type'] = option[1]
+            parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
+
--- a/src/calibre/ebooks/epub/fix/container.py
+++ b/src/calibre/ebooks/epub/fix/container.py
@ -0,0 +1,200 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, posixpath, urllib, sys, re
+
+from lxml import etree
+
+from calibre.ebooks.epub.fix import InvalidEpub
+from calibre import guess_type, prepare_string_for_xml
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.constants import iswindows
+from calibre.utils.zipfile import ZipFile, ZIP_STORED
+
+exists, join = os.path.exists, os.path.join
+
+OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
+OPF_NS = 'http://www.idpf.org/2007/opf'
+
+class Container(object):
+
+    META_INF = {
+            'container.xml' : True,
+            'manifest.xml' : False,
+            'encryption.xml' : False,
+            'metadata.xml' : False,
+            'signatures.xml' : False,
+            'rights.xml' : False,
+    }
+
+    def __init__(self, path, log):
+        self.root = os.path.abspath(path)
+        self.log = log
+        self.dirtied = set([])
+        self.cache = {}
+        self.mime_map = {}
+
+        if exists(join(self.root, 'mimetype')):
+            os.remove(join(self.root, 'mimetype'))
+
+        container_path = join(self.root, 'META-INF', 'container.xml')
+        if not exists(container_path):
+            raise InvalidEpub('No META-INF/container.xml in epub')
+        self.container = etree.fromstring(open(container_path, 'rb').read())
+        opf_files = self.container.xpath((
+            r'child::ocf:rootfiles/ocf:rootfile'
+            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
+            ), namespaces={'ocf':OCF_NS}
+        )
+        if not opf_files:
+            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
+        opf_path = os.path.join(self.root,
+                *opf_files[0].get('full-path').split('/'))
+        if not exists(opf_path):
+            raise InvalidEpub('OPF file does not exist at location pointed to'
+                    ' by META-INF/container.xml')
+
+        # Map of relative paths with / separators to absolute
+        # paths on filesystem with os separators
+        self.name_map = {}
+        for dirpath, dirnames, filenames in os.walk(self.root):
+            for f in filenames:
+                path = join(dirpath, f)
+                name = os.path.relpath(path, self.root).replace(os.sep, '/')
+                self.name_map[name] = path
+                if path == opf_path:
+                    self.opf_name = name
+                    self.mime_map[name] = guess_type('a.opf')[0]
+
+        for item in self.opf.xpath(
+                '//opf:manifest/opf:item[@href and @media-type]',
+                namespaces={'opf':OPF_NS}):
+            href = item.get('href')
+            self.mime_map[self.href_to_name(href,
+                posixpath.dirname(self.opf_name))] = item.get('media-type')
+
+    def manifest_worthy_names(self):
+        for name in self.name_map:
+            if name.endswith('.opf'): continue
+            if name.startswith('META-INF') and \
+                    posixpath.basename(name) in self.META_INF: continue
+            yield name
+
+    def delete_name(self, name):
+        self.mime_map.pop(name, None)
+        path = self.name_map[name]
+        os.remove(path)
+        self.name_map.pop(name)
+
+    def manifest_item_for_name(self, name):
+        href = self.name_to_href(name,
+            posixpath.dirname(self.opf_name))
+        q = prepare_string_for_xml(href, attribute=True)
+        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
+                namespaces={'opf':OPF_NS})
+        if not existing:
+            return None
+        return existing[0]
+
+    def add_name_to_manifest(self, name):
+        item = self.manifest_item_for_name(name)
+        if item is not None:
+            return
+        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
+        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
+                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
+                id=self.generate_manifest_id())
+        mt = guess_type(posixpath.basename(name))[0]
+        if not mt:
+            mt = 'application/octest-stream'
+        item.set('media-type', mt)
+        manifest.append(item)
+
+    def generate_manifest_id(self):
+        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
+                namespaces={'opf':OPF_NS})
+        ids = set([x.get('id') for x in items])
+        for x in xrange(sys.maxint):
+            c = 'id%d'%x
+            if c not in ids:
+                return c
+
+    @property
+    def opf(self):
+        return self.get(self.opf_name)
+
+    def href_to_name(self, href, base=''):
+        href = urllib.unquote(href.partition('#')[0])
+        name = href
+        if base:
+            name = posixpath.join(base, href)
+        return name
+
+    def name_to_href(self, name, base):
+        if not base:
+            return name
+        return posixpath.relpath(name, base)
+
+    def get_raw(self, name):
+        path = self.name_map[name]
+        return open(path, 'rb').read()
+
+    def get(self, name):
+        if name in self.cache:
+            return self.cache[name]
+        raw = self.get_raw(name)
+        if name in self.mime_map:
+            raw = self._parse(raw, self.mime_map[name])
+        self.cache[name] = raw
+        return raw
+
+    def set(self, name, val):
+        self.cache[name] = val
+        self.dirtied.add(name)
+
+    def _parse(self, raw, mimetype):
+        mt = mimetype.lower()
+        if mt.endswith('+xml'):
+            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
+            raw = xml_to_unicode(raw,
+                strip_encoding_pats=True, assume_utf8=True,
+                resolve_entities=True)[0].strip()
+            idx = raw.find('<html')
+            if idx == -1:
+                idx = raw.find('<HTML')
+            if idx > -1:
+                pre = raw[:idx]
+                raw = raw[idx:]
+                if '<!DOCTYPE' in pre:
+                    user_entities = {}
+                    for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
+                        val = match.group(2)
+                        if val.startswith('"') and val.endswith('"'):
+                            val = val[1:-1]
+                        user_entities[match.group(1)] = val
+                    if user_entities:
+                        pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
+                        raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
+            return etree.fromstring(raw, parser=parser)
+        return raw
+
+    def write(self, path):
+        for name in self.dirtied:
+            data = self.cache[name]
+            raw = data
+            if hasattr(data, 'xpath'):
+                raw = etree.tostring(data, encoding='utf-8',
+                        xml_declaration=True)
+            with open(self.name_map[name], 'wb') as f:
+                f.write(raw)
+        self.dirtied.clear()
+        zf = ZipFile(path, 'w')
+        zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
+                compression=ZIP_STORED)
+        zf.add_dir(self.root)
+        zf.close()
+
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
+from calibre.utils.date import parse_date, strptime
+
+
+class Epubcheck(ePubFixer):
+
+    name = 'Workaround epubcheck bugs'
+
+    @property
+    def short_description(self):
+        return _('Workaround epubcheck bugs')
+
+    @property
+    def long_description(self):
+        return _('Workarounds for bugs in the latest release of epubcheck. '
+                'epubcheck reports many things as errors that are not '
+                'actually errors. epub-fix will try to detect these and replace '
+                'them with constructs that epubcheck likes. This may cause '
+                'significant changes to your epub, complain to the epubcheck '
+                'project.')
+
+    @property
+    def fix_name(self):
+        return 'epubcheck'
+
+    def fix_pubdates(self):
+        dirtied = False
+        opf = self.container.opf
+        for dcdate in opf.xpath('//dc:date',
+                namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
+            raw = dcdate.text
+            if not raw: raw = ''
+            default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
+            try:
+                ts = parse_date(raw, assume_utc=False, as_utc=True,
+                        default=default)
+            except:
+                raise InvalidEpub('Invalid date set in OPF', raw)
+            sval = ts.strftime('%Y-%m-%d')
+            if sval != raw:
+                self.log.error(
+                    'OPF contains date', raw, 'that epubcheck does not like')
+                if self.fix:
+                    dcdate.text = sval
+                    self.log('\tReplaced', raw, 'with', sval)
+                    dirtied = True
+        if dirtied:
+            self.container.set(self.container.opf_name, opf)
+
+    def fix_preserve_aspect_ratio(self):
+        for name in self.container.name_map:
+            mt = self.container.mime_map.get(name, '')
+            if mt.lower() == 'application/xhtml+xml':
+                root = self.container.get(name)
+                dirtied = False
+                for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
+                        namespaces={'svg':'http://www.w3.org/2000/svg'}):
+                    self.log.error('Found <svg> element with'
+                            ' preserveAspectRatio="none" which epubcheck '
+                            'cannot handle')
+                    if self.fix:
+                        svg.set('preserveAspectRatio', 'xMidYMid meet')
+                        dirtied = True
+                        self.log('\tReplaced none with xMidYMid meet')
+                if dirtied:
+                    self.container.set(name, root)
+
+
+    def run(self, container, opts, log, fix=False):
+        self.container = container
+        self.opts = opts
+        self.log = log
+        self.fix = fix
+        self.fix_pubdates()
+        self.fix_preserve_aspect_ratio()
--- a/src/calibre/ebooks/epub/fix/main.py
+++ b/src/calibre/ebooks/epub/fix/main.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, os
+
+from calibre.utils.config import OptionParser
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
+from calibre.utils.zipfile import ZipFile
+from calibre.utils.logging import default_log
+from calibre.customize.ui import epub_fixers
+from calibre.ebooks.epub.fix.container import Container
+
+def option_parser():
+    parser = OptionParser(usage=_(
+        '%prog [options] file.epub\n\n'
+        'Fix common problems in EPUB files that can cause them '
+        'to be rejected by poorly designed publishing services.\n\n'
+        'By default, no fixing is done and messages are printed out '
+        'for each error detected. Use the options to control which errors '
+        'are automatically fixed.'))
+    for fixer in epub_fixers():
+        fixer.add_options_to_parser(parser)
+
+    return parser
+
+
+def run(epub, opts, log):
+    with TemporaryDirectory('_epub-fix') as tdir:
+        with CurrentDir(tdir):
+            zf = ZipFile(epub)
+            zf.extractall()
+            zf.close()
+            container = Container(tdir, log)
+            for fixer in epub_fixers():
+                fix = getattr(opts, fixer.fix_name, False)
+                fixer.run(container, opts, log, fix=fix)
+            container.write(epub)
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) != 2:
+        parser.print_help()
+        print
+        default_log.error(_('You must specify an epub file'))
+        return
+    epub = os.path.abspath(args[1])
+    run(epub, opts, default_log)
+
+if __name__ == '__main__':
+    main()
--- a/src/calibre/ebooks/epub/fix/unmanifested.py
+++ b/src/calibre/ebooks/epub/fix/unmanifested.py
@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.epub.fix import ePubFixer
+
+class Unmanifested(ePubFixer):
+
+    name = 'Fix unmanifested files'
+
+    @property
+    def short_description(self):
+        return _('Fix unmanifested files')
+
+    @property
+    def long_description(self):
+        return _('Fix unmanifested files. epub-fix can either add them to '
+        'the manifest or delete them as specified by the '
+        'delete unmanifested option.')
+
+    @property
+    def fix_name(self):
+        return 'unmanifested'
+
+    @property
+    def options(self):
+        return [('delete_unmanifested', 'bool', False,
+            _('Delete unmanifested files instead of adding them to the manifest'))]
+
+    def run(self, container, opts, log, fix=False):
+        dirtied = False
+        for name in list(container.manifest_worthy_names()):
+            item = container.manifest_item_for_name(name)
+            if item is None:
+                log.error(name, 'not in manifest')
+                if fix:
+                    if opts.delete_unmanifested:
+                        container.delete_name(name)
+                        log('\tDeleted')
+                    else:
+                        container.add_name_to_manifest(name)
+                        log('\tAdded to manifest')
+                        dirtied = True
+        if dirtied:
+            container.set(container.opf_name, container.opf)
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -380,10 +380,9 @@ class EPUBOutput(OutputFormatPlugin):
                    sel = '.'+lb.get('class')
                    for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
                        if sel == rule.selectorList.selectorText:
-                            val = rule.style.removeProperty('margin-left')
-                            pval = rule.style.getProperty('padding-left')
-                            if val and not pval:
-                                rule.style.setProperty('padding-left', val)
+                            rule.style.removeProperty('margin-left')
+                            # padding-left breaks rendering in webkit and gecko
+                            rule.style.removeProperty('padding-left')

    # }}}

--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -20,7 +20,7 @@ from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux, isfreebsd
+from calibre.constants import islinux, isfreebsd, iswindows
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
@ -32,9 +32,14 @@ class Link(object):

    @classmethod
    def url_to_local_path(cls, url, base):
-        path = urlunparse(('', '', url.path, url.params, url.query, ''))
+        path = url.path
+        isabs = False
+        if iswindows and path.startswith('/'):
+            path = path[1:]
+            isabs = True
+        path = urlunparse(('', '', path, url.params, url.query, ''))
        path = unquote(path)
-        if os.path.isabs(path):
+        if isabs or os.path.isabs(path):
            return path
        return os.path.abspath(os.path.join(base, path))

@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
            xpath
        from calibre import guess_type
        import cssutils
+        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb
@ -323,7 +329,7 @@ class HTMLInput(InputFormatPlugin):
            metadata.add('identifier', mi.isbn, attrib={'scheme':'ISBN'})
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
-            metadata.add('language', get_lang())
+            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', self.oeb.translate(__('Unknown')))
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
-            if item.media_type in OEB_STYLES:
+            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
        oeb.container = DirContainer(os.getcwdu(), oeb.log)
        return oeb

+    def link_to_local_path(self, link_, base=None):
+        if not isinstance(link_, unicode):
+            try:
+                link_ = link_.decode('utf-8', 'error')
+            except:
+                self.log.warn('Failed to decode link %r. Ignoring'%link_)
+                return None, None
+        try:
+            l = Link(link_, base if base else os.getcwdu())
+        except:
+            self.log.exception('Failed to process link: %r'%link_)
+            return None, None
+        if l.path is None:
+            # Not a local resource
+            return None, None
+        link = l.path.replace('/', os.sep).strip()
+        frag = l.fragment
+        if not link:
+            return None, None
+        return link, frag

    def resource_adder(self, link_, base=None):
-        link = self.urlnormalize(link_)
-        link, frag = self.urldefrag(link)
-        link = unquote(link).replace('/', os.sep)
-        if not link.strip():
+        link, frag = self.link_to_local_path(link_, base=base)
+        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):

            item = self.oeb.manifest.add(id, href, media_type)
            item.html_input_href = bhref
+            if guessed in self.OEB_STYLES:
+                item.override_css_fetch = partial(
+                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href

@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
            nlink = '#'.join((nlink, frag))
        return nlink

-
+    def css_import_handler(self, base, href):
+        link, frag = self.link_to_local_path(href, base=base)
+        if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
+            return (None, None)
+        try:
+            raw = open(link, 'rb').read().decode('utf-8', 'replace')
+            raw = self.oeb.css_preprocessor(raw, add_namespace=True)
+        except:
+            self.log.exception('Failed to read CSS file: %r'%link)
+            return (None, None)
+        return (None, raw)



--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open('lrs.xml', 'wb').write(xml.encode('utf-8'))
-        parser = etree.XMLParser(recover=True, no_network=True)
+        parser = etree.XMLParser(no_network=True, huge_tree=True)
        doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -870,7 +870,7 @@ class Text(LRFStream):
        open_containers = collections.deque()
        for c in self.content:
            if isinstance(c, basestring):
-                s += prepare_string_for_xml(c)
+                s += prepare_string_for_xml(c).replace('\0', '')
            elif c is None:
                if open_containers:
                    p = open_containers.pop()
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -268,7 +268,7 @@ class MetaInformation(object):
                  ):
            prints(x, getattr(self, x, 'None'))

-    def smart_update(self, mi):
+    def smart_update(self, mi, replace_tags=False):
        '''
        Merge the information in C{mi} into self. In case of conflicts, the information
        in C{mi} takes precedence, unless the information in mi is NULL.
@ -282,7 +282,7 @@ class MetaInformation(object):
        for attr in ('author_sort', 'title_sort', 'category',
                     'publisher', 'series', 'series_index', 'rating',
                     'isbn', 'application_id', 'manifest', 'spine', 'toc',
-                     'cover', 'language', 'guide', 'book_producer',
+                     'cover', 'guide', 'book_producer',
                     'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
                     'publication_type', 'uuid'):
            if hasattr(mi, attr):
@ -291,6 +291,9 @@ class MetaInformation(object):
                    setattr(self, attr, val)

        if mi.tags:
+            if replace_tags:
+                self.tags = mi.tags
+            else:
                self.tags += mi.tags
        self.tags = list(set(self.tags))

@ -314,6 +317,11 @@ class MetaInformation(object):
        if len(other_comments.strip()) > len(my_comments.strip()):
            self.comments = other_comments

+        other_lang = getattr(mi, 'language', None)
+        if other_lang and other_lang.lower() != 'und':
+            self.language = other_lang
+
+
    def format_series_index(self):
        try:
            x = float(self.series_index)
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize import Plugin
+
+class CoverDownload(Plugin):
+
+    supported_platforms = ['windows', 'osx', 'linux']
+    author = 'Kovid Goyal'
+    type = _('Cover download')
--- a/src/calibre/ebooks/metadata/douban.py
+++ b/src/calibre/ebooks/metadata/douban.py
@ -15,7 +15,6 @@ from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.date import parse_date, utcnow

-DOUBAN_API_KEY = None
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
@ -35,13 +34,15 @@ date           = XPath("descendant::db:attribute[@name='pubdate']")
 creator        = XPath("descendant::db:attribute[@name='author']")
 tag            = XPath("descendant::db:tag")

+CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
+
 class DoubanBooks(MetadataSource):

    name = 'Douban Books'
    description = _('Downloads metadata from Douban.com')
    supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
    author              = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
-    version             = (1, 0, 0)   # The version number of this plugin
+    version             = (1, 0, 1)   # The version number of this plugin

    def fetch(self):
        try:
@ -65,7 +66,7 @@ class Query(object):
    type = "search"

    def __init__(self, title=None, author=None, publisher=None, isbn=None,
-                 max_results=20, start_index=1):
+                 max_results=20, start_index=1, api_key=''):
        assert not(title is None and author is None and publisher is None and \
                   isbn is None)
        assert (int(max_results) < 21)
@ -89,16 +90,16 @@ class Query(object):

        if self.type == "isbn":
            self.url = self.ISBN_URL + q
-            if DOUBAN_API_KEY is not None:
-                self.url = self.url + "?apikey=" + DOUBAN_API_KEY
+            if api_key != '':
+                self.url = self.url + "?apikey=" + api_key
        else:
            self.url = self.SEARCH_URL+urlencode({
                    'q':q,
                    'max-results':max_results,
                    'start-index':start_index,
                    })
-            if DOUBAN_API_KEY is not None:
-                self.url = self.url + "&apikey=" + DOUBAN_API_KEY
+            if api_key != '':
+                self.url = self.url + "&apikey=" + api_key

    def __call__(self, browser, verbose):
        if verbose:
@ -177,7 +178,7 @@ class ResultList(list):
            d = None
        return d

-    def populate(self, entries, browser, verbose=False):
+    def populate(self, entries, browser, verbose=False, api_key=''):
        for x in entries:
            try:
                id_url = entry_id(x)[0].text
@ -186,8 +187,8 @@ class ResultList(list):
                report(verbose)
            mi = MetaInformation(title, self.get_authors(x))
            try:
-                if DOUBAN_API_KEY is not None:
-                    id_url = id_url + "?apikey=" + DOUBAN_API_KEY
+                if api_key != '':
+                    id_url = id_url + "?apikey=" + api_key
                raw = browser.open(id_url).read()
                feed = etree.fromstring(raw)
                x = entry(feed)[0]
@ -203,12 +204,16 @@ class ResultList(list):
            self.append(mi)

 def search(title=None, author=None, publisher=None, isbn=None,
-           verbose=False, max_results=40):
+           verbose=False, max_results=40, api_key=None):
    br   = browser()
    start, entries = 1, []
+
+    if api_key is None:
+        api_key = CALIBRE_DOUBAN_API_KEY
+
    while start > 0 and len(entries) <= max_results:
        new, start = Query(title=title, author=author, publisher=publisher, 
-                       isbn=isbn, max_results=max_results, start_index=start)(br, verbose)
+                       isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
        if not new:
            break
        entries.extend(new)
@ -216,7 +221,7 @@ def search(title=None, author=None, publisher=None, isbn=None,
    entries = entries[:max_results]

    ans = ResultList()
-    ans.populate(entries, br, verbose)
+    ans.populate(entries, br, verbose, api_key)
    return ans

 def option_parser():
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -10,10 +10,11 @@ from calibre import prints
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import default_log
 from calibre.customize import Plugin
+from calibre.ebooks.metadata.library_thing import check_for_cover

 metadata_config = None

-class MetadataSource(Plugin):
+class MetadataSource(Plugin): # {{{

    author = 'Kovid Goyal'

@ -130,7 +131,9 @@ class MetadataSource(Plugin):
    def customization_help(self):
        return 'This plugin can only be customized using the GUI'

-class GoogleBooks(MetadataSource):
+    # }}}
+
+class GoogleBooks(MetadataSource): # {{{

    name = 'Google Books'
    description = _('Downloads metadata from Google Books')
@ -145,8 +148,9 @@ class GoogleBooks(MetadataSource):
            self.exception = e
            self.tb = traceback.format_exc()

+    # }}}

-class ISBNDB(MetadataSource):
+class ISBNDB(MetadataSource): # {{{

    name = 'IsbnDB'
    description = _('Downloads metadata from isbndb.com')
@ -181,7 +185,9 @@ class ISBNDB(MetadataSource):
                'and enter your access key below.')
        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')

-class Amazon(MetadataSource):
+    # }}}
+
+class Amazon(MetadataSource): # {{{

    name = 'Amazon'
    metadata_type = 'social'
@ -198,37 +204,27 @@ class Amazon(MetadataSource):
            self.exception = e
            self.tb = traceback.format_exc()

-class LibraryThing(MetadataSource):
+    # }}}
+
+class LibraryThing(MetadataSource): # {{{

    name = 'LibraryThing'
    metadata_type = 'social'
-    description = _('Downloads series information from librarything.com')
+    description = _('Downloads series/tags/rating information from librarything.com')

    def fetch(self):
        if not self.isbn:
            return
-        from calibre import browser
-        from calibre.ebooks.metadata import MetaInformation
-        import json
-        br = browser()
+        from calibre.ebooks.metadata.library_thing import get_social_metadata
        try:
-            raw = br.open(
-                    'http://status.calibre-ebook.com/library_thing/metadata/'+self.isbn
-                    ).read()
-            data = json.loads(raw)
-            if not data:
-                return
-            if 'error' in data:
-                raise Exception(data['error'])
-            if 'series' in data and 'series_index' in data:
-                mi = MetaInformation(self.title, [])
-                mi.series = data['series']
-                mi.series_index = data['series_index']
-                self.results = mi
+            self.results = get_social_metadata(self.title, self.book_author,
+                    self.publisher, self.isbn)
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()

+    # }}}
+

 def result_index(source, result):
    if not result.isbn:
@ -268,6 +264,26 @@ class MetadataSources(object):
        for s in self.sources:
            s.join()

+def filter_metadata_results(item):
+    keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
+    for keyword in keywords:
+        if item.publisher and keyword in item.publisher.lower():
+            return False
+    return True
+
+def do_cover_check(item):
+    item.has_cover = False
+    if item.isbn:
+        try:
+            item.has_cover = check_for_cover(item.isbn)
+        except:
+            pass # Cover not found
+
+def check_for_covers(items):
+    threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
+    for t in threads: t.start()
+    for t in threads: t.join()
+
 def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
           verbose=0):
    assert not(title is None and author is None and publisher is None and \
@ -285,10 +301,73 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
    for fetcher in fetchers[1:]:
        merge_results(results, fetcher.results)

-    results = sorted(results, cmp=lambda x, y : cmp(
-            (x.comments.strip() if x.comments else ''),
-            (y.comments.strip() if y.comments else '')
-                                                  ), reverse=True)
+    results = list(filter(filter_metadata_results, results))
+
+    check_for_covers(results)
+
+    words = ("the", "a", "an", "of", "and")
+    prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
+    trailing_paren_pat = re.compile(r'\(.*\)$')
+    whitespace_pat = re.compile(r'\s+')
+
+    def sort_func(x, y):
+
+        def cleanup_title(s):
+            if s is None:
+                s = _('Unknown')
+            s = s.strip().lower()
+            s = prefix_pat.sub(' ', s)
+            s = trailing_paren_pat.sub('', s)
+            s = whitespace_pat.sub(' ', s)
+            return s.strip()
+
+        t = cleanup_title(title)
+        x_title = cleanup_title(x.title)
+        y_title = cleanup_title(y.title)
+
+        # prefer titles that start with the search title
+        tx = cmp(t, x_title)
+        ty = cmp(t, y_title)
+        result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
+
+        # then prefer titles that have a cover image
+        if result == 0:
+            result = -cmp(x.has_cover, y.has_cover)
+
+        # then prefer titles with the longest comment, with in 10%
+        if result == 0:
+            cx = len(x.comments.strip() if x.comments else '')
+            cy = len(y.comments.strip() if y.comments else '')
+            t = (cx + cy) / 20
+            result = cy - cx
+            if abs(result) < t:
+                result = 0
+
+        return result
+
+    results = sorted(results, cmp=sort_func)
+
+    # if for some reason there is no comment in the top selection, go looking for one
+    if len(results) > 1:
+        if not results[0].comments or len(results[0].comments) == 0:
+            for r in results[1:]:
+                try:
+                    if title and title.lower() == r.title[:len(title)].lower() \
+                            and r.comments and len(r.comments):
+                        results[0].comments = r.comments
+                        break
+                except:
+                    pass
+        # Find a pubdate
+        pubdate = None
+        for r in results:
+            if r.pubdate is not None:
+                pubdate = r.pubdate
+                break
+        if pubdate is not None:
+            for r in results:
+                if r.pubdate is None:
+                    r.pubdate = pubdate

    return results, [(x.name, x.exception, x.tb) for x in fetchers]

--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -34,7 +34,8 @@ def fetch_metadata(url, max=100, timeout=5.):
            errmsg = soup.find('errormessage').string
            raise ISBNDBError('Error fetching metadata: '+errmsg)
        total_results = int(book_list['total_results'])
-        np = '&page_number=%s&'%(page_number+1)
+        page_number += 1
+        np = '&page_number=%s&'%page_number
        url = re.sub(r'\&page_number=\d+\&', np, url)
        books.extend(book_list.findAll('bookdata'))
        max -= 1
--- a/src/calibre/ebooks/metadata/library_thing.py
+++ b/src/calibre/ebooks/metadata/library_thing.py
@ -6,13 +6,31 @@ Fetch cover from LibraryThing.com based on ISBN number.

 import sys, socket, os, re

-from calibre import browser as _browser
+from lxml import html
+import mechanize
+
+from calibre import browser, prints
 from calibre.utils.config import OptionParser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-browser = None

 OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'

+class HeadRequest(mechanize.Request):
+
+    def get_method(self):
+        return 'HEAD'
+
+def check_for_cover(isbn, timeout=5.):
+    br = browser()
+    br.set_handle_redirect(False)
+    try:
+        br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
+        return True
+    except Exception, e:
+        if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
+            return True
+    return False
+
 class LibraryThingError(Exception):
    pass

@ -22,31 +40,28 @@ class ISBNNotFound(LibraryThingError):
 class ServerBusy(LibraryThingError):
    pass

-def login(username, password, force=True):
-    global browser
-    if browser is not None and not force:
-        return
-    browser = _browser()
-    browser.open('http://www.librarything.com')
-    browser.select_form('signup')
-    browser['formusername'] = username
-    browser['formpassword'] = password
-    browser.submit()
+def login(br, username, password, force=True):
+    br.open('http://www.librarything.com')
+    br.select_form('signup')
+    br['formusername'] = username
+    br['formpassword'] = password
+    br.submit()


 def cover_from_isbn(isbn, timeout=5., username=None, password=None):
-    global browser
-    if browser is None:
-        browser = _browser()
    src = None
+    br = browser()
    try:
-        return browser.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
+        return br.open(OPENLIBRARY%isbn, timeout=timeout).read(), 'jpg'
    except:
        pass # Cover not found
    if username and password:
-        login(username, password, force=False)
        try:
-        src = browser.open('http://www.librarything.com/isbn/'+isbn,
+            login(br, username, password, force=False)
+        except:
+            pass
+    try:
+        src = br.open_novisit('http://www.librarything.com/isbn/'+isbn,
                timeout=timeout).read().decode('utf-8', 'replace')
    except Exception, err:
        if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
@ -63,7 +78,7 @@ def cover_from_isbn(isbn, timeout=5., username=None, password=None):
        if url is None:
            raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
        url = re.sub(r'_S[XY]\d+', '', url['src'])
-        cover_data = browser.open(url).read()
+        cover_data = br.open_novisit(url).read()
        return cover_data, url.rpartition('.')[-1]

 def option_parser():
@ -71,7 +86,7 @@ def option_parser():
 _('''
 %prog [options] ISBN

-Fetch a cover image for the book identified by ISBN from LibraryThing.com
+Fetch a cover image/social metadata for the book identified by ISBN from LibraryThing.com
 '''))
    parser.add_option('-u', '--username', default=None,
                      help='Username for LibraryThing.com')
@ -79,6 +94,61 @@ Fetch a cover image for the book identified by ISBN from LibraryThing.com
                      help='Password for LibraryThing.com')
    return parser

+def get_social_metadata(title, authors, publisher, isbn, username=None,
+        password=None):
+    from calibre.ebooks.metadata import MetaInformation
+    mi = MetaInformation(title, authors)
+    if isbn:
+        br = browser()
+        if username and password:
+            try:
+                login(br, username, password, force=False)
+            except:
+                pass
+
+        raw = br.open_novisit('http://www.librarything.com/isbn/'
+                    +isbn).read()
+        if not raw:
+            return mi
+        root = html.fromstring(raw)
+        h1 = root.xpath('//div[@class="headsummary"]/h1')
+        if h1 and not mi.title:
+            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
+        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
+        if h2 and not mi.authors:
+            mi.authors = [html.tostring(x, method='text', encoding=unicode) for
+                    x in h2]
+        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
+        if h3:
+            match = None
+            for h in h3:
+               series = html.tostring(h, method='text', encoding=unicode)
+               match = re.search(r'(.+) \((.+)\)', series)
+               if match is not None:
+                   break
+            if match is not None:
+                mi.series = match.group(1).strip()
+                match = re.search(r'[0-9.]+', match.group(2))
+                si = 1.0
+                if match is not None:
+                    si = float(match.group())
+                mi.series_index = si
+        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
+        #if tags:
+        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
+        #            in tags]
+        span = root.xpath(
+                '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
+        if span:
+            raw = html.tostring(span[0], method='text', encoding=unicode)
+            match = re.search(r'([0-9.]+)', raw)
+            if match is not None:
+                rating = float(match.group())
+                if rating > 0 and rating <= 5:
+                    mi.rating = rating
+    return mi
+
+
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
@ -86,6 +156,8 @@ def main(args=sys.argv):
        parser.print_help()
        return 1
    isbn = args[1]
+    mi = get_social_metadata('', [], '', isbn)
+    prints(mi)
    cover_data, ext = cover_from_isbn(isbn, username=opts.username,
            password=opts.password)
    if not ext:
--- a/Show More
+++ b/Show More