Merge from trunk

2025-08-11 09:13:57 -04:00 · 2013-02-23 09:39:57 +01:00 · 2013-02-23 09:39:57 +01:00 · 9197ea19b9
commit 9197ea19b9
parent 0da2a109cf b4603c4be1
213 changed files with 88001 additions and 57675 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,117 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.20
+  date: 2013-02-22
+
+  new features:
+    - title: "Book polishing: Add an option to smarten punctuation in the book when polishing"
+
+    - title: "Book polishing: Add an option to delete all saved settings to the load saved settings button"
+
+    - title: "Book polishing: Remember the last used settings"
+
+    - title: "Book polishing: Add a checkbox to enable/disable the detailed polishing report"
+
+    - title: "Add a separate tweak in Preferences-Tweaks for saving backups of files when polishing. That way you can have calibre save backups while converting EPUB->EPUB and not while polishing, if you so desire."
+
+    - title: "Content server: Allow clicking on the book cover to download it. Useful on small screen devices where clicking the Get button may be difficult"
+
+    - title: "Driver for Energy Systems C4 Touch."
+      tickets: [1127477]
+ 
+  bug fixes:
+    - title: "E-book viewer: Fix a bug that could cause the back button in the viewer to skip a location"
+
+    - title: "When tweaking/polishing an azw3 file that does not have an identified content ToC, do not auto-generate one."
+      tickets: [1130729]
+
+    - title: "Book polishing: Use the actual cover image dimensions when creating the svg wrapper for the cover image."
+      tickets: [1127273]
+
+    - title: "Book polishing: Do not error out on epub files containing an iTunesMetadata.plist file."
+      tickets: [1127308]
+
+    - title: "Book polishing: Fix trying to polish more than 5 books at a time not working"
+
+    - title: "Content server: Add workaround for bug in latest release of Google Chrome that causes it to not work with book lists containing some utf-8 characters"
+      tickets: [1130478]
+
+    - title: "E-book viewer: When viewing EPUB files, do not parse html as xhtml even if it has svg tags embedded. This allows malformed XHTML files to still be viewed."
+
+    - title: "Bulk metadata edit Search & recplace: Update the sample values when changing the type of identifier to search on"
+
+    - title: "Fix recipes with the / character in their names not useable from the command line"
+      tickets: [1127666]
+
+    - title: "News download: Fix regression that broke downloading of images in gif format"
+
+    - title: "EPUB/AZW3 Output: When splitting the output html on page breaks, handle page-break-after rules correctly, the pre split point html should contain the full element"
+
+    - title: "Fix stdout/stderr redirection temp files not being deleted when restarting calibre from within calibre on windows"
+
+    - title: "E-book viewer: When viewing epub files that have their cover marked as non-linear, show the cover at the start of the book instead of the end."
+      tickets: [1126030]
+
+    - title: "EPUB Input: Fix handling of cover references with fragments in the urls"
+
+  improved recipes:
+    - Fronda
+    - Various Polish news sources
+
+  new recipes:
+    - title: Pravda 
+      author: Darko Miletic
+
+    - title: PNN 
+      author: n.kucklaender
+
+    - title: Various Polish news sources 
+      author: fenuks
+
+- version: 0.9.19
+  date: 2013-02-15
+
+  new features:
+    - title: "New tool: \"Polish books\" that allows you to perform various automated cleanup actions on EPUB and AZW3 files without doing a full conversion."
+      type: major
+      description: "Polishing books is all about putting the shine of perfection on your ebook files. You can use it to subset embedded fonts, update the metadata in the book files from the metadata in the calibre library, manipulate the book jacket, etc. More features will be added in the future. To use this tool, go to Preferences->Toolbar and add the Polish books tool to the main toolbar. Then simply select the books you want to be polished and click the Polish books button. Polishing, unlike conversion, does not change the internal structure/markup of your book, it performs only the minimal set of actions needed to achieve its goals. Note that polish books is a completely new codebase, so there may well be bugs, polishing a book backs up the original as ORIGINAL_EPUB or ORIGINAL_AZW3, unless you have turned off this feature in Preferences->Tweaks, in which case you should backup your files manually. You can also use this tool from the command line with ebook-polish.exe."
+
+    - title: "Driver for the Trekstor Pyrus Mini."
+      tickets: [1124120]
+
+    - title: "E-book viewer: Add an option to change the minimum font size."
+      tickets: [1122333]
+ 
+    - title: "PDF Output: Add support for converting documents with math typesetting, as described here: http://manual.calibre-ebook.com/typesetting_math.html"
+
+    - title: "Column coloring/icons: Add more conditions when using date based columns with reference to 'today'."
+
+  bug fixes:
+    - title: "Transforming to titlecase - handle typographic hyphens in all caps phrases"
+
+    - title: "Dont ignore file open events that occur before the GUI is initialized on OS X"
+      tickets: [1122713]
+
+    - title: "News download: Handle feeds that have entries with empty ids"
+
+    - title: "Fix a regression that broke using the template editor"
+
+    - title: "Do not block startup while scanning the computer for available network interfaces. Speeds up startup time on some windows computers with lots of spurious network interfaces."
+
+  improved recipes:
+    - New Yorker
+    - Kommersant
+    - Le Monde (Subscription version)
+    - NZ Herald
+
+  new recipes:
+    - title: Navegalo 
+      author: Douglas Delgado
+
+    - title: El Guardian and More Intelligent Life
+      author: Darko Miletic
+
 - version: 0.9.18
  date: 2013-02-08

--- a/manual/develop.rst
+++ b/manual/develop.rst
@ -39,27 +39,27 @@ All the |app| python code is in the ``calibre`` package. This package contains t

    * devices - All the device drivers. Just look through some of the built-in drivers to get an idea for how they work.

-      * For details, see: devices.interface which defines the interface supported by device drivers and devices.usbms which
+      * For details, see: devices.interface which defines the interface supported by device drivers and ``devices.usbms`` which
        defines a generic driver that connects to a USBMS device. All USBMS based drivers in |app| inherit from it.

    * ebooks  - All the ebook conversion/metadata code. A good starting point is ``calibre.ebooks.conversion.cli`` which is the
-      module powering the :command:`ebook-convert` command. The conversion process is controlled via conversion.plumber.
-      The format independent code is all in ebooks.oeb and the format dependent code is in ebooks.format_name.
+      module powering the :command:`ebook-convert` command. The conversion process is controlled via ``conversion.plumber``.
+      The format independent code is all in ``ebooks.oeb`` and the format dependent code is in ``ebooks.format_name``.

-        * Metadata reading, writing, and downloading is all in ebooks.metadata
+        * Metadata reading, writing, and downloading is all in ``ebooks.metadata``
        * Conversion happens in a pipeline, for the structure of the pipeline,
          see :ref:`conversion-introduction`. The pipeline consists of an input
          plugin, various transforms and an output plugin. The that code constructs
-          and drives the pipeline is in plumber.py. The pipeline works on a
+          and drives the pipeline is in :file:`plumber.py`. The pipeline works on a
          representation of an ebook that is like an unzipped epub, with
          manifest, spine, toc, guide, html content, etc. The
-          class that manages this representation is OEBBook in oeb/base.py. The
+          class that manages this representation is OEBBook in ``ebooks.oeb.base``. The
          various transformations that are applied to the book during
-          conversions live in `oeb/transforms/*.py`. And the input and output
-          plugins live in `conversion/plugins/*.py`.
+          conversions live in :file:`oeb/transforms/*.py`. And the input and output
+          plugins live in :file:`conversion/plugins/*.py`.

-    * library - The database back-end and the content server. See library.database2 for the interface to the |app| library. library.server is the |app| Content Server.
-    * gui2 - The Graphical User Interface. GUI initialization happens in gui2.main and gui2.ui. The ebook-viewer is in gui2.viewer.
+    * library - The database back-end and the content server. See ``library.database2`` for the interface to the |app| library. ``library.server`` is the |app| Content Server.
+    * gui2 - The Graphical User Interface. GUI initialization happens in ``gui2.main`` and ``gui2.ui``. The ebook-viewer is in ``gui2.viewer``.

 If you need help understanding the code, post in the `development forum <http://www.mobileread.com/forums/forumdisplay.php?f=240>`_
 and you will most likely get help from one of |app|'s many developers.
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -250,42 +250,71 @@ If you don't want to uninstall it altogether, there are a couple of tricks you c
 simplest is to simply re-name the executable file that launches the library program. More detail
 `in the forums <http://www.mobileread.com/forums/showthread.php?t=65809>`_.

-How do I use |app| with my iPad/iPhone/iTouch?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+How do I use |app| with my iPad/iPhone/iPod touch?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 Over the air
 ^^^^^^^^^^^^^^

-The easiest way to browse your |app| collection on your Apple device (iPad/iPhone/iPod) is by using the calibre content server, which makes your collection available over the net. First perform the following steps in |app|
+The easiest way to browse your |app| collection on your Apple device
+(iPad/iPhone/iPod) is by using the |app| content server, which makes your
+collection available over the net. First perform the following steps in |app|

-  * Set the Preferred Output Format in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
-  * Set the output profile to iPad (this will work for iPhone/iPods as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
-  * Convert the books you want to read on your iPhone to EPUB format by selecting them and clicking the Convert button.
-  * Turn on the Content Server in |app|'s preferences and leave |app| running.
+  * Set the Preferred Output Format in |app| to EPUB (The output format can be
+    set under :guilabel:`Preferences->Interface->Behavior`)
+  * Set the output profile to iPad (this will work for iPhone/iPods as well),
+    under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
+  * Convert the books you want to read on your iDevice to EPUB format by
+    selecting them and clicking the Convert button.
+  * Turn on the Content Server by clicking the :guilabel:`Connect/Share` button
+    and leave |app| running. You can also tell |app| to automatically start the
+    content server via :guilabel:`Preferences->Sharing over the net`.

-Now on your iPad/iPhone you have two choices, use either iBooks (version 1.2 and later) or Stanza (version 3.0 and later). Both are available free from the app store.
+There are many apps for your iDevice that can connect to |app|. Here we
+describe using two of them, iBooks and Stanza.

 Using Stanza
 ***************

-Now you should be able to access your books on your iPhone by opening Stanza. Go to "Get Books" and then click the "Shared" tab. Under Shared you will see an entry "Books in calibre". If you don't, make sure your iPad/iPhone is connected using the WiFi network in your house, not 3G. If the |app| catalog is still not detected in Stanza, you can add it manually in Stanza. To do this, click the "Shared" tab, then click the "Edit" button and then click "Add book source" to add a new book source. In the Add Book Source screen enter whatever name you like and in the URL field, enter the following::
+You should be able to access your books on your iPhone by opening Stanza. Go to
+"Get Books" and then click the "Shared" tab. Under Shared you will see an entry
+"Books in calibre". If you don't, make sure your iPad/iPhone is connected using
+the WiFi network in your house, not 3G. If the |app| catalog is still not
+detected in Stanza, you can add it manually in Stanza. To do this, click the
+"Shared" tab, then click the "Edit" button and then click "Add book source" to
+add a new book source. In the Add Book Source screen enter whatever name you
+like and in the URL field, enter the following::

    http://192.168.1.2:8080/

-Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.   Now click "Save" and you are done.
+Replace ``192.168.1.2`` with the local IP address of the computer running
+|app|. If you have changed the port the |app| content server is running on, you
+will have to change ``8080`` as well to the new port. The local IP address is
+the IP address you computer is assigned on your home network. A quick Google
+search will tell you how to find out your local IP address.   Now click "Save"
+and you are done.

-If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
+If you get timeout errors while browsing the calibre catalog in Stanza, try
+increasing the connection timeout value in the stanza settings. Go to
+Info->Settings and increase the value of Download Timeout.

 Using iBooks
 **************

-Start the Safari browser and type in the IP address and port of the computer running the calibre server, like this::
+Start the Safari browser and type in the IP address and port of the computer
+running the calibre server, like this::

    http://192.168.1.2:8080/

-Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
+Replace ``192.168.1.2`` with the local IP address of the computer running
+|app|. If you have changed the port the |app| content server is running on, you
+will have to change ``8080`` as well to the new port. The local IP address is
+the IP address you computer is assigned on your home network. A quick Google
+search will tell you how to find out your local IP address.

-You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
+You will see a list of books in Safari, just click on the epub link for
+whichever book you want to read, Safari will then prompt you to open it with
+iBooks.


 With the USB cable + iTunes
@ -663,7 +692,7 @@ Post any output you see in a help message on the `Forum <http://www.mobileread.c
 |app| freezes/crashes occasionally?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-There are five possible things I know of, that can cause this:
+There are several possible things I know of, that can cause this:

    * You recently connected an external monitor or TV to your computer. In
      this case, whenever |app| opens a new window like the edit metadata
@ -671,10 +700,6 @@ There are five possible things I know of, that can cause this:
      you dont notice it and so you think |app| has frozen. Disconnect your
      second monitor and restart calibre.

-    * You are using a Wacom branded USB mouse. There is an incompatibility between
-      Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
-      mouse.
-
    * If you use RoboForm, it is known to cause |app| to crash. Add |app| to
      the blacklist of programs inside RoboForm to fix this. Or uninstall
      RoboForm.
@ -685,6 +710,17 @@ There are five possible things I know of, that can cause this:
    * Constant Guard Protection by Xfinity causes crashes in |app|. You have to
      manually allow |app| in it or uninstall Constant Guard Protection.

+    * Spybot - Search & Destroy blocks |app| from accessing its temporary files
+      breaking viewing and converting of books.
+
+    * You are using a Wacom branded USB mouse. There is an incompatibility between
+      Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
+      mouse.
+
+    * On some 64 bit versions of Windows there are security software/settings
+      that prevent 64-bit |app| from working properly. If you are using the 64-bit
+      version of |app| try switching to the 32-bit version.
+
 If none of the above apply to you, then there is some other program on your
 computer that is interfering with |app|. First reboot your computer in safe
 mode, to have as few running programs as possible, and see if the crashes still
--- a/manual/gui.rst
+++ b/manual/gui.rst
@ -537,6 +537,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
      - Merge selected records, keeping originals
    * - :kbd:`O`
      - Open containing folder
+    * - :kbd:`P`
+      - Polish books
    * - :kbd:`S`
      - Save to Disk
    * - :kbd:`V`
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -11,7 +11,7 @@ class Adventure_zone(BasicNewsRecipe):
    max_articles_per_feed = 100
    cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
    index='http://www.adventure-zone.info/fusion/'
-    use_embedded_content=False
+    use_embedded_content = False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
    (re.compile(r'</?table.*?>'), lambda match: ''),
    (re.compile(r'</?tbody.*?>'), lambda match: '')]
@ -21,7 +21,7 @@ class Adventure_zone(BasicNewsRecipe):
    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]

-    def parse_feeds (self): 
+    '''def parse_feeds (self): 
      feeds = BasicNewsRecipe.parse_feeds(self) 
      soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
      tag=soup.find(name='channel')
@ -34,7 +34,7 @@ class Adventure_zone(BasicNewsRecipe):
      for feed in feeds:
        for article in feed.articles[:]:
            article.title=titles[feed.articles.index(article)]
-      return feeds
+      return feeds'''


    '''def get_cover_url(self):
@ -42,16 +42,25 @@ class Adventure_zone(BasicNewsRecipe):
        cover=soup.find(id='box_OstatninumerAZ')
        self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
        return getattr(self, 'cover_url', self.cover_url)'''
-
+    def populate_article_metadata(self, article, soup, first):
+        result = re.search('(.+) - Adventure Zone', soup.title.string)
+        if result:
+            article.title = result.group(1)
+        else:
+            result = soup.body.find('strong')
+            if result:
+                article.title = result.string

    def skip_ad_pages(self, soup):
        skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
        skip_tag = skip_tag.findAll(name='a')
-        for r in skip_tag:
-           if r.strong:
-                 word=r.strong.string.lower()
-                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word) or ('poradnik' in word)):
-                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
+        title = soup.title.string.lower()
+        if (('zapowied' in title) or ('recenzj' in title)  or ('solucj' in title) or ('poradnik' in title)):
+            for r in skip_tag:
+                if r.strong and r.strong.string:
+                   word=r.strong.string.lower()
+                   if (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word) or ('poradnik' in word)):
+                       return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)

    def preprocess_html(self, soup):
        footer=soup.find(attrs={'class':'news-footer middle-border'})
--- a/recipes/badania_net.recipe
+++ b/recipes/badania_net.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class BadaniaNet(BasicNewsRecipe):
+    title          = u'badania.net'
+    __author__ = 'fenuks'
+    description   = u'chcesz wiedzieć więcej?'
+    category       = 'science'
+    language       = 'pl'
+    cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    use_embedded_content = False
+    remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
+    remove_tags_after = dict(attrs={'class':'omc-single-tags'})
+    keep_only_tags = [dict(id='omc-full-article')]
+    feeds          = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
--- a/recipes/bash_org_pl.recipe
+++ b/recipes/bash_org_pl.recipe
@ -35,8 +35,8 @@ class Bash_org_pl(BasicNewsRecipe):
             soup=self.index_to_soup(u'http://bash.org.pl/random/')
             #date=soup.find('div', attrs={'class':'right'}).string
             url=soup.find('a', attrs={'class':'qid click'})
-             title=url.string
-             url='http://bash.org.pl' +url['href']
+             title=''
+             url='http://bash.org.pl/random/'
             articles.append({'title' : title,
 	               'url'   : url,
 	               'date'  : '',
@ -44,6 +44,8 @@ class Bash_org_pl(BasicNewsRecipe):
 	                })
         return articles

+    def populate_article_metadata(self, article, soup, first):
+        article.title = soup.find(attrs={'class':'qid click'}).string

    def parse_index(self):
         feeds          = []
--- a/recipes/discover_magazine.recipe
+++ b/recipes/discover_magazine.recipe
@ -33,6 +33,21 @@ class DiscoverMagazine(BasicNewsRecipe):

    remove_tags_after = [dict(name='div', attrs={'class':'listingBar'})]

+    # Login stuff
+    needs_subscription = True
+    use_javascript_to_login = True
+    requires_version = (0, 9, 20)
+
+    def javascript_login(self, br, username, password):
+        br.visit('http://discovermagazine.com', timeout=120)
+        f = br.select_form('div.login.section div.form')
+        f['username'] = username
+        f['password'] = password
+        br.submit('input[id="signInButton"]', timeout=120)
+        br.run_for_a_time(20)
+    # End login stuff
+
+
    def append_page(self, soup, appendtag, position):
        pager = soup.find('span',attrs={'class':'next'})
        if pager:
--- a/recipes/ekologia_pl.recipe
+++ b/recipes/ekologia_pl.recipe
@ -15,7 +15,8 @@ class EkologiaPl(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
-    remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj']})]
+    remove_attrs = ['style']
+    remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj', 'widget-social-buttons']})]

    feeds          = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]

--- a/recipes/el_malpensante.recipe
+++ b/recipes/el_malpensante.recipe
@ -0,0 +1,27 @@
+# coding=utf-8
+# https://github.com/iemejia/calibrecolombia
+
+'''
+http://www.elmalpensante.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElMalpensante(BasicNewsRecipe):
+    title                 = u'El Malpensante'
+    language              = 'es_CO'
+    __author__            = 'Ismael Mejia <iemejia@gmail.com>'
+    cover_url             = 'http://elmalpensante.com/img/layout/logo.gif'
+    description           = 'El Malpensante'
+    oldest_article        = 30
+    simultaneous_downloads = 20
+    #tags = 'news, sport, blog'
+    use_embedded_content = True
+    remove_empty_feeds = True
+    max_articles_per_feed = 100
+    feeds          = [(u'Artículos', u'http://www.elmalpensante.com/articulosRSS.php'),
+                      (u'Malpensantías', u'http://www.elmalpensante.com/malpensantiasRSS.php'),
+                      (u'Margaritas', u'http://www.elmalpensante.com/margaritasRSS.php'),
+# This one is almost the same as articulos so we leave articles
+#              (u'Noticias', u'http://www.elmalpensante.com/noticiasRSS.php'),
+                      ]
--- a/recipes/elguardian.recipe
+++ b/recipes/elguardian.recipe
@ -0,0 +1,93 @@
+__license__   = 'GPL v3'
+__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elguardian.com.ar
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElGuardian(BasicNewsRecipe):
+    title                 = 'El Guardian'
+    __author__            = 'Darko Miletic'
+    description           = "Semanario con todas las tendencias de un pais"
+    publisher             = 'Editorial Apache SA'
+    category              = 'news,politics,Argentina'
+    oldest_article        = 8
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'es_AR'
+    remove_empty_feeds    = True
+    publication_type      = 'magazine'
+    issn                  = '1666-7476'
+    masthead_url          = 'http://elguardian.com.ar/application/templates/frontend/images/home/logo.png'
+    extra_css             = """
+                               body{font-family: Arial,sans-serif}
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        , 'series'    : title
+                        , 'isbn'      : issn
+                        }
+
+    keep_only_tags    = [dict(attrs={'class':['fotos', 'header_nota', 'nota']})]
+    remove_tags       = [dict(name=['meta','link','iframe','embed','object'])]
+    remove_attributes = ['lang']
+
+    feeds = [
+              (u'El Pais'       , u'http://elguardian.com.ar/RSS/el-pais.xml'       )
+             ,(u'Columnistas'   , u'http://elguardian.com.ar/RSS/columnistas.xml'   )
+             ,(u'Personajes'    , u'http://elguardian.com.ar/RSS/personajes.xml'    )
+             ,(u'Tinta roja'    , u'http://elguardian.com.ar/RSS/tinta-roja.xml'    )
+             ,(u'Yo fui'        , u'http://elguardian.com.ar/RSS/yo-fui.xml'        )
+             ,(u'Ciencia'       , u'http://elguardian.com.ar/RSS/ciencia.xml'       )
+             ,(u'Cronicas'      , u'http://elguardian.com.ar/RSS/cronicas.xml'      )
+             ,(u'Culturas'      , u'http://elguardian.com.ar/RSS/culturas.xml'      )
+             ,(u'DxT'           , u'http://elguardian.com.ar/RSS/dxt.xml'           )
+             ,(u'Fierros'       , u'http://elguardian.com.ar/RSS/fierros.xml'       )
+             ,(u'Frente fashion', u'http://elguardian.com.ar/RSS/frente-fashion.xml')
+             ,(u'Pan y vino'    , u'http://elguardian.com.ar/RSS/pan-y-vino.xml'    )
+             ,(u'Turismo'       , u'http://elguardian.com.ar/RSS/turismo.xml'       )
+            ]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://elguardian.com.ar/')
+        udata = soup.find('div', attrs={'class':'datosNumero'})
+        if udata:
+           sdata = udata.find('div')
+           if sdata:
+               stra = re.findall(r'\d+', self.tag_to_string(sdata))
+               self.conversion_options.update({'series_index':int(stra[1])})
+        unumero = soup.find('div', attrs={'class':'ultimoNumero'})
+        if unumero:
+           img = unumero.find('img', src=True)
+           if img:
+              return img['src']
+        return None
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/eso_pl.recipe
+++ b/recipes/eso_pl.recipe
@ -0,0 +1,23 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ESO(BasicNewsRecipe):
+    title          = u'ESO PL'
+    __author__        = 'fenuks'
+    description   = u'ESO, Europejskie Obserwatorium Południowe, buduje i obsługuje najbardziej zaawansowane naziemne teleskopy astronomiczne na świecie'
+    category       = 'astronomy'
+    language       = 'pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    use_embedded_content = False
+    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1922519424/eso-twitter-logo.png'
+    keep_only_tags = [dict(attrs={'class':'subcl'})]
+    remove_tags = [dict(id='lang_row'), dict(attrs={'class':['pr_typeid', 'pr_news_feature_link', 'outreach_usage', 'hidden']})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://www.eso.org/public/poland/news/feed/'), (u'Og\u0142oszenia', u'http://www.eso.org/public/poland/announcements/feed/'), (u'Zdj\u0119cie tygodnia', u'http://www.eso.org/public/poland/images/potw/feed/')]
+
+    def preprocess_html(self, soup):
+        for a in soup.findAll('a', href=True):
+            if a['href'].startswith('/'):
+                a['href'] = 'http://www.eso.org' + a['href']
+        return soup
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@ -23,7 +23,6 @@ class Fronda(BasicNewsRecipe):
    extra_css = '''
        h1 {font-size:150%}
        .body {text-align:left;}
-        div.headline {font-weight:bold}
    '''

    earliest_date = date.today() - timedelta(days=oldest_article)
@ -72,7 +71,7 @@ class Fronda(BasicNewsRecipe):
            feeds.append((genName, articles[genName]))
        return feeds

-    keep_only_tags = [ 
+    keep_only_tags = [
        dict(name='div', attrs={'class':'yui-g'})
        ]

@ -84,5 +83,7 @@ class Fronda(BasicNewsRecipe):
        dict(name='ul', attrs={'class':'comment-list'}),
        dict(name='ul', attrs={'class':'category'}),
        dict(name='p', attrs={'id':'comments-disclaimer'}),
+        dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
+        dict(name='div', attrs={'style':'text-align: left; margin-top: 15px;'}),
        dict(name='div', attrs={'id':'comment-form'})
        ]
--- a/recipes/hnonline.recipe
+++ b/recipes/hnonline.recipe
@ -0,0 +1,68 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class HNonlineRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'lacike'
+    language = 'sk'
+    version = 1
+
+    title = u'HNonline'
+    publisher = u'HNonline'
+    category = u'News, Newspaper'
+    description = u'News from Slovakia'
+    cover_url = u'http://hnonline.sk/img/sk/_relaunch/logo2.png'
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    remove_empty_feeds = True
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    # Feeds from: http://rss.hnonline.sk, for listing see http://rss.hnonline.sk/prehlad
+    feeds = []
+    feeds.append((u'HNonline|Ekonomika a firmy', u'http://rss.hnonline.sk/?p=kC1000'))
+    feeds.append((u'HNonline|Slovensko', u'http://rss.hnonline.sk/?p=kC2000'))
+    feeds.append((u'HNonline|Svet', u'http://rss.hnonline.sk/?p=kC3000'))
+    feeds.append((u'HNonline|\u0160port', u'http://rss.hnonline.sk/?p=kC4000'))
+    feeds.append((u'HNonline|Online rozhovor', u'http://rss.hnonline.sk/?p=kCR000'))
+
+    feeds.append((u'FinWeb|Spr\u00E1vy zo sveta financi\u00ED', u'http://rss.finweb.hnonline.sk/spravodajstvo'))
+    feeds.append((u'FinWeb|Koment\u00E1re a anal\u00FDzy', u'http://rss.finweb.hnonline.sk/?p=kPC200'))
+    feeds.append((u'FinWeb|Invest\u00EDcie', u'http://rss.finweb.hnonline.sk/?p=kPC300'))
+    feeds.append((u'FinWeb|Svet akci\u00ED', u'http://rss.finweb.hnonline.sk/?p=kPC400'))
+    feeds.append((u'FinWeb|Rozhovory', u'http://rss.finweb.hnonline.sk/?p=kPC500'))
+    feeds.append((u'FinWeb|T\u00E9ma t\u00FD\u017Ed\u0148a', u'http://rss.finweb.hnonline.sk/?p=kPC600'))
+    feeds.append((u'FinWeb|Rebr\u00ED\u010Dky', u'http://rss.finweb.hnonline.sk/?p=kPC700'))
+
+    feeds.append((u'HNstyle|Kult\u00FAra', u'http://style.hnonline.sk/?p=kTC100'))
+    feeds.append((u'HNstyle|Auto-moto', u'http://style.hnonline.sk/?p=kTC200'))
+    feeds.append((u'HNstyle|Digit\u00E1l', u'http://style.hnonline.sk/?p=kTC300'))
+    feeds.append((u'HNstyle|Veda', u'http://style.hnonline.sk/?p=kTCV00'))
+    feeds.append((u'HNstyle|Dizajn', u'http://style.hnonline.sk/?p=kTC400'))
+    feeds.append((u'HNstyle|Cestovanie', u'http://style.hnonline.sk/?p=kTCc00'))
+    feeds.append((u'HNstyle|V\u00EDkend', u'http://style.hnonline.sk/?p=kTC800'))
+    feeds.append((u'HNstyle|Gastro', u'http://style.hnonline.sk/?p=kTC600'))
+    feeds.append((u'HNstyle|M\u00F3da', u'http://style.hnonline.sk/?p=kTC700'))
+    feeds.append((u'HNstyle|Modern\u00E1 \u017Eena', u'http://style.hnonline.sk/?p=kTCA00'))
+    feeds.append((u'HNstyle|Pre\u010Do nie?!', u'http://style.hnonline.sk/?p=k7C000'))
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'h1', attrs = {'class': 'detail-titulek'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'detail-podtitulek'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'detail-perex'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'detail-text'}))
+
+    remove_tags = []
+    #remove_tags.append(dict(name = 'div', attrs = {'id': re.compile('smeplayer.*')}))
+
+    remove_tags_after = []
+    #remove_tags_after = [dict(name = 'p', attrs = {'class': 'autor_line'})]
+
+    extra_css = '''
+                @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
+                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)}
+                body {font-family: sans1, serif1;}
+                '''
--- a/recipes/icons/badania_net.png
+++ b/recipes/icons/badania_net.png
--- a/recipes/icons/elguardian.png
+++ b/recipes/icons/elguardian.png
--- a/recipes/icons/eso_pl.png
+++ b/recipes/icons/eso_pl.png
--- a/recipes/icons/hnonline.png
+++ b/recipes/icons/hnonline.png
--- a/recipes/icons/kurier_galicyjski.png
+++ b/recipes/icons/kurier_galicyjski.png
--- a/recipes/icons/more_intelligent_life.png
+++ b/recipes/icons/more_intelligent_life.png
--- a/recipes/icons/nauka_w_polsce.png
+++ b/recipes/icons/nauka_w_polsce.png
--- a/recipes/icons/osworld_pl.png
+++ b/recipes/icons/osworld_pl.png
--- a/recipes/icons/pravda_rs.png
+++ b/recipes/icons/pravda_rs.png
--- a/recipes/icons/ubuntu_pomoc_org.png
+++ b/recipes/icons/ubuntu_pomoc_org.png
--- a/recipes/icons/wprost_rss.png
+++ b/recipes/icons/wprost_rss.png
--- a/recipes/informacje_usa.recipe
+++ b/recipes/informacje_usa.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import re
 class Informacje_USA(BasicNewsRecipe):
    title          = u'Informacje USA'
    oldest_article = 7
@ -8,11 +7,10 @@ class Informacje_USA(BasicNewsRecipe):
    description   = u'portal wiadomości amerykańskich'
    category       = 'news'
    language       = 'pl'
-    masthead_url= 'http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
-    cover_url='http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
+    cover_url='http://www.informacjeusa.com/wp-content/uploads/2013/01/V3BANNER420-90new.jpg'
    no_stylesheets = True
-    preprocess_regexps = [(re.compile(ur'<p>Zobacz:.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><a href=".*?Zobacz także:.*?</a></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><p>Zobacz też:.*?</a></p>', re.DOTALL), lambda match: '')]
-    keep_only_tags=[dict(name='div', attrs={'class':'box box-single'})]
-    remove_tags_after= dict(attrs={'class':'tags'})
-    remove_tags= [dict(attrs={'class':['postmetadata', 'tags', 'banner']}), dict(name='a', attrs={'title':['Drukuj', u'Wyślij']})]
+    use_embedded_content = False
+    keep_only_tags=[dict(id='post-area')]
+    remove_tags_after= dict(id='content-area')
+    remove_tags= [dict(attrs={'class':['breadcrumb']}), dict(id=['social-box', 'social-box-vert'])]
    feeds          = [(u'Informacje', u'http://www.informacjeusa.com/feed/')]
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -0,0 +1,14 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class KDEFamilyPl(BasicNewsRecipe):
+    title          = u'KDEFamily.pl'
+    __author__        = 'fenuks'
+    description   = u'KDE w Polsce'
+    category       = 'open source, KDE'
+    language       = 'pl'
+    cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    use_embedded_content = True
+    feeds          = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/kommersant.recipe
+++ b/recipes/kommersant.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kommersant.ru
 '''
@ -29,17 +29,20 @@ class Kommersant_ru(BasicNewsRecipe):
 							"""

    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : language
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
                        }

    keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})]
    remove_tags    = [dict(name=['iframe','object','link','img','base','meta'])]

-    feeds       = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
-
+    feeds       = [(u'Articles', u'http://dynamic.feedsportal.com/pf/438800/http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
+    
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+    
    def print_version(self, url):	    
        return url.replace('/doc-rss/','/Doc/') + '/Print'
 		
--- a/recipes/kurier_galicyjski.recipe
+++ b/recipes/kurier_galicyjski.recipe
@ -0,0 +1,56 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
+class KurierGalicyjski(BasicNewsRecipe):
+    title          = u'Kurier Galicyjski'
+    __author__        = 'fenuks'
+    #description   = u''
+    category       = 'news'
+    language       = 'pl'
+    cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_empty_feeds = True
+    no_stylesheets = True
+    keep_only_tags = [dict(attrs={'class':'item-page'})]
+    remove_tags = [dict(attrs={'class':'pagenav'}), dict(attrs={'style':'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})]
+    feeds          = [(u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'), (u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), (u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'), (u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'), (u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'), (u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'), (u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'), (u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'), (u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'), (u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), (u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')]
+
+    def append_page(self, soup, appendtag):
+        pager = soup.find(id='article-index')
+        if pager:
+            pager = pager.findAll('a')[1:]
+        if pager:
+            for a in pager:
+                nexturl = 'http://www.kuriergalicyjski.com' + a['href']
+                soup2 = self.index_to_soup(nexturl)
+                pagetext = soup2.find(attrs={'class':'item-page'})
+                if pagetext.h2:
+                    pagetext.h2.extract()
+                r = pagetext.find(attrs={'class':'article-info'})
+                if r:
+                    r.extract()
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+                pos = len(appendtag.contents)
+            for r in appendtag.findAll(id='article-index'):
+                r.extract()
+            for r in appendtag.findAll(attrs={'class':'pagenavcounter'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'class':'pagination'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'class':'pagenav'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'style':'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        for r in soup.findAll(style=True):
+            del r['style']
+        for img in soup.findAll(attrs={'class':'easy_img_caption smartresize'}):
+            img.insert(len(img.contents)-1, bs('<br />'))
+            img.insert(len(img.contents), bs('<br /><br />'))
+        for a in soup.findAll('a', href=True):
+            if a['href'].startswith('/'):
+                a['href'] = 'http://kuriergalicyjski.com' + a['href']
+        return soup
--- a/recipes/le_monde_sub.recipe
+++ b/recipes/le_monde_sub.recipe
@ -1,166 +1,94 @@
-#!/usr/bin/env  python
-
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+__author__    = 'Sylvain Durand <sylvain.durand@ponts.org>'
 __license__   = 'GPL v3'
-__copyright__ = '2012, 2013, Rémi Vanicat <vanicat at debian.org>'
-'''
-Lemonde.fr: Version abonnée
-'''

+import time

-import os, zipfile, re, time
-from urllib2 import HTTPError
-from calibre.constants import preferred_encoding
-
+from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ptempfile import PersistentTemporaryFile
+from urllib2 import HTTPError

-class LeMondeAbonne(BasicNewsRecipe):
+class LeMonde(BasicNewsRecipe):

-    title                 = u'Le Monde: Édition abonnés'
-    __author__            = u'Rémi Vanicat'
-    description           = u'Actualités'
-    category              = u'Actualités, France, Monde'
-    publisher             = 'Le Monde'
-    language              = 'fr'
-    needs_subscription    = True
-    no_stylesheets        = True
-    smarten_punctuation   = True
-    remove_attributes     = [ 'border', 'cellspacing', 'display', 'align', 'cellpadding', 'colspan', 'valign', 'vscape', 'hspace', 'alt', 'width', 'height']
-    extra_css = ''' li{margin:6pt 0}
-                    ul{margin:0}
+    title              = u'Le Monde: Édition abonnés'
+    __author__         = 'Sylvain Durand'
+    description        = u'Disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
+    language           = 'fr'
+    encoding           = 'utf8'

-                    div.photo img{max-width:100%; border:0px transparent solid;}
-                    div.photo{font-family:inherit; color:#333; text-align:center;}
-                    div.photo p{text-align:justify;font-size:.9em; line-height:.9em;}
+    needs_subscription = True

-                    @page{margin:10pt}
-                    .ar-txt {color:#000; text-align:justify;}
-                    h1{text-align:left; font-size:1.25em;}
+    date_url           = 'http://www.lemonde.fr/journalelectronique/donnees/libre/%Y%m%d/index.html'
+    login_url          = 'http://www.lemonde.fr/web/journal_electronique/identification/1,56-0,45-0,0.html'
+    journal_url        = 'http://www.lemonde.fr/journalelectronique/donnees/protege/%Y%m%d/%Y%m%d_ipad.xml'
+    masthead_url       = 'http://upload.wikimedia.org/wikipedia/fr/thumb/c/c5/Le_Monde_logo.svg/300px-Le_Monde_logo.svg.png'
+    couverture_url     = 'http://medias.lemonde.fr/abonnes/editionelectronique/%Y%m%d/html/data/img/%y%m%d01.jpg'

-                    .auteur{text-align:right; font-weight:bold}
-                    .feed{text-align:right; font-weight:bold}
-                    .po-ti2{font-weight:bold}
-                    .fen-tt{font-weight:bold;font-size:1.1em}
-    '''
+    extra_css = '''
+                img{max-width:100%}
+                h1{font-size:1.2em !important; line-height:1.2em !important; }
+                h2{font-size:1em !important; line-height:1em !important; }
+                h3{font-size:1em !important; text-transform:uppercase !important; color:#666;}
+                #photo{text-align:center !important; margin:10px 0 -8px;}
+                #lgd{font-size:1em !important; line-height:1em !important;  font-style:italic; color:#333;} '''

-    zipurl_format = 'http://medias.lemonde.fr/abonnes/editionelectronique/%Y%m%d/html/%y%m%d.zip'
-    coverurl_format = '/img/%y%m%d01.jpg'
-    path_format = "%y%m%d"
-    login_url = 'http://www.lemonde.fr/web/journal_electronique/identification/1,56-0,45-0,0.html'
+    keep_only_tags = [dict(name=['h1','h2','h3','div','txt'])]

-    keep_only_tags = [dict(name=['h1']), dict(name='div', attrs={ 'class': 'photo' }), dict(name='div', attrs={ 'class': 'po-ti2' }), dict(name='div', attrs={ 'class': 'ar-txt' }), dict(name='div', attrs={ 'class': 'po_rtcol' }) ]
-
-
-    remove_tags = [ dict(name='div', attrs={ 'class': 'po-ti' }),dict(name='div', attrs={ 'class': 'po-copy' })]
-
-    article_id_pattern = re.compile("[0-9]+\\.html")
-    article_url_format = 'http://www.lemonde.fr/journalelectronique/donnees/protege/%Y%m%d/html/'
+    def __init__(self, options, log, progress_reporter):
+        BasicNewsRecipe.__init__(self, options, log, progress_reporter)
+        br = BasicNewsRecipe.get_browser(self)
+        second = time.time() + 24*60*60
+        for i in range(7):
+            self.date = time.gmtime(second)
+            try:
+                br.open(time.strftime(self.date_url,self.date))
+                break
+            except HTTPError:
+                second -= 24*60*60
+        self.timefmt = strftime(u" %A %d %B %Y", self.date).replace(u' 0', u' ')

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            br.open(self.login_url)
-            br.select_form(nr=0)
-            br['login']    = self.username
-            br['password'] = self.password
-            br.submit()
+        br.open(self.login_url)
+        br.select_form(nr=0)
+        br['login']    = self.username
+        br['password'] = self.password
+        br.submit()
        return br

-    decalage = 24 * 60 * 60     # today Monde has tomorow date
-
    def get_cover_url(self):
-        url = time.strftime(self.coverurl_format, self.ltime)
-        return self.articles_path + url
+        url = time.strftime(self.couverture_url,self.date)
+        return url

    def parse_index(self):
-        browser = self.get_browser()
-
-        second = time.time()
-        second += self.decalage
-
-        for i in range(7):
-            self.ltime = time.gmtime(second)
-            self.timefmt=time.strftime(" %A %d %B %Y",self.ltime).decode(preferred_encoding)
-            url = time.strftime(self.zipurl_format,self.ltime)
-            try:
-                response = browser.open(url)
-                continue
-            except HTTPError:
-                second -= 24*60*60
-
-        tmp = PersistentTemporaryFile(suffix='.zip')
-        self.report_progress(0.1,_('downloading zip file'))
-        tmp.write(response.read())
-        tmp.close()
-
-        zfile = zipfile.ZipFile(tmp.name, 'r')
-        self.report_progress(0.1,_('extracting zip file'))
-
-        zfile.extractall(self.output_dir)
-        zfile.close()
-
-        path = os.path.join(self.output_dir, time.strftime(self.path_format, self.ltime), "data")
-
-        self.articles_path = path
-
-        files = os.listdir(path)
-
-        nb_index_files = len([ name for name in files if re.match("frame_gauche_[0-9]+.html", name) ])
-
-        flux = []
-
-        article_url = time.strftime(self.article_url_format, self.ltime)
-
-        for i in range(nb_index_files):
-            filename = os.path.join(path, "selection_%d.html" % (i + 1))
-            tmp = open(filename,'r')
-            soup=BeautifulSoup(tmp,convertEntities=BeautifulSoup.HTML_ENTITIES)
-            title=soup.find('span').contents[0]
-            if title=="Une":
-                title="À la une"
-            if title=="Evenement":
-                title="L'événement"
-            if title=="Planete":
-                title="Planète"
-            if title=="Economie - Entreprises":
-                title="Économie"
-            if title=="L'Oeil du Monde":
-                title="L'œil du Monde"
-            if title=="Enquete":
-                title="Enquête"
-            if title=="Editorial - Analyses":
-                title="Analyses"
-            if title=="Le Monde Economie":
-                title="Économie"
-            if title=="Le Monde Culture et idées":
-                title="Idées"
-            if title=="Le Monde Géo et politique":
-                title="Géopolitique"
-            tmp.close()
-
-            filename = os.path.join(path, "frame_gauche_%d.html" % (i + 1))
-            tmp = open(filename,'r')
-            soup = BeautifulSoup(tmp)
+        url = time.strftime(self.journal_url,self.date)
+        soup = self.index_to_soup(url).sommaire
+        sections = []
+        for sec in soup.findAll("section"):
            articles = []
-            for link in soup.findAll("a"):
-                article_file = link['href']
-                article_id=self.article_id_pattern.search(article_file).group()
-                article = {
-                    'title': link.contents[0],
-                    'url': article_url + article_id,
-                    'description': '',
-                    'content': ''
-                    }
-                articles.append(article)
-            tmp.close()
+            if sec['cahier'] != "Le Monde":
+                for col in sec.findAll("fnts"):
+                    col.extract()
+            if sec['cahier']=="Le Monde Magazine":
+                continue
+            for art in sec.findAll("art"):
+                if art.txt.string and art.ttr.string:
+                    if art.find(['url']):
+                        art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
+                    if art.find(['lgd']) and art.find(['lgd']).string:
+                        art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
+                    article = "<html><head></head><body>"+unicode(art)+"</body></html>"
+                    article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
+                    article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
+                    f = PersistentTemporaryFile()
+                    f.write(article)
+                    articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
+            sections.append((sec['nom'], articles))
+        return sections

-            flux.append((title, articles))
+    def preprocess_html(self, soup):
+        for lgd in soup.findAll(id="lgd"):
+            lgd.contents[-1].extract()
+        return soup

-        return flux
-
-
-
-# Local Variables:
-# mode: python
-# End:
--- a/recipes/mlody_technik_pl.recipe
+++ b/recipes/mlody_technik_pl.recipe
@ -1,5 +1,5 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Mlody_technik(BasicNewsRecipe):
    title          = u'Młody technik'
@ -9,7 +9,19 @@ class Mlody_technik(BasicNewsRecipe):
    language       = 'pl'
    cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
    no_stylesheets = True
+    preprocess_regexps = [(re.compile(r"<h4>Podobne</h4>", re.IGNORECASE), lambda m: '')]
    oldest_article = 7
    max_articles_per_feed = 100
-    #keep_only_tags=[dict(id='container')]
-    feeds          = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]
+    remove_empty_feeds = True
+    use_embedded_content = False
+    keep_only_tags = [dict(id='content')]
+    remove_tags = [dict(attrs={'class':'st-related-posts'})]
+    remove_tags_after = dict(attrs={'class':'entry-content clearfix'})
+    feeds          = [(u'Wszystko', u'http://www.mt.com.pl/feed'), 
+		(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
+		(u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'),
+		(u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'),
+		(u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'),
+		(u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'),
+		(u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'),
+		(u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')]
--- a/recipes/more_intelligent_life.recipe
+++ b/recipes/more_intelligent_life.recipe
@ -0,0 +1,67 @@
+__license__   = 'GPL v3'
+__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+'''
+moreintelligentlife.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MoreIntelligentLife(BasicNewsRecipe):
+    title                 = 'More Intelligent Life'
+    __author__            = 'Darko Miletic'
+    description           = "More Intelligent Life (moreintelligentlife.com) is the online version of Intelligent Life, a lifestyle and culture magazine from The Economist. The website offers not only content from the print edition, trickled out over the course of its shelf-life, but also the Editors' Blog, which carries daily posts from the editorial team-quickfire observations and opinions that allow readers to eavesdrop on the conversation in the office."
+    publisher             = 'The Economist Newspaper ltd'
+    category              = 'arts,lifestyle,intelligent life,the economist,ideas,style,culture'
+    oldest_article        = 60
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'website'
+    extra_css             = """
+                               body{font-family: Arial,"Helvetica neue","Bitstream Vera Sans",sans-serif}
+                               img{margin-bottom: 0.4em; display:block}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags    = [dict(attrs={'class':'node'})]
+    remove_tags_after = dict(attrs={'class':'tags'})
+    remove_tags       = [dict(name=['meta','link','iframe','embed','object'])]
+    remove_attributes = ['lang']
+
+    feeds = [(u'Articles', u'http://feeds.feedburner.com/MoreintelligentlifeTotal')]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://moreintelligentlife.com/')
+        for image in soup.findAll('img', src=True):
+           if image['src'].startswith('http://moreintelligentlife.com/files/covers/current_issue_'):
+              return image['src']
+        return None
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/nauka_w_polsce.recipe
+++ b/recipes/nauka_w_polsce.recipe
@ -0,0 +1,47 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class NaukawPolsce(BasicNewsRecipe):
+    title          = u'Nauka w Polsce'
+    __author__        = 'fenuks'
+    description   = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak:  osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
+    category       = 'science'
+    language       = 'pl'
+    cover_url = 'http://www.naukawpolsce.pap.pl/Themes/Pap/images/logo-pl.gif'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    index = 'http://www.naukawpolsce.pl'
+    keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})]
+    remove_tags = [dict(name='div', attrs={'class':'tagi'})]
+
+    def find_articles(self, url):
+        articles = []
+        soup=self.index_to_soup(url)
+        for i in soup.findAll(name='div', attrs={'class':'aktualnosci-margines lista-depesz information-content'}):
+            title = i.h1.a.string
+            url = self.index + i.h1.a['href']
+            date = '' #i.span.string
+            articles.append({'title' : title,
+                   'url'   : url,
+                   'date'  : date,
+                   'description' : ''
+                    })
+        return articles
+
+    def parse_index(self):
+         feeds = []
+         feeds.append((u"Historia i kultura", self.find_articles('http://www.naukawpolsce.pl/historia-i-kultura/')))
+         feeds.append((u"Kosmos", self.find_articles('http://www.naukawpolsce.pl/kosmos/')))
+         feeds.append((u"Przyroda", self.find_articles('http://www.naukawpolsce.pl/przyroda/')))
+         feeds.append((u"Społeczeństwo", self.find_articles('http://www.naukawpolsce.pl/spoleczenstwo/')))
+         feeds.append((u"Technologie", self.find_articles('http://www.naukawpolsce.pl/technologie/')))
+         feeds.append((u"Uczelnie", self.find_articles('http://www.naukawpolsce.pl/uczelnie/')))
+         feeds.append((u"Nauki medyczne", self.find_articles('http://www.naukawpolsce.pl/zdrowie/')))
+
+         return feeds
+
+    def preprocess_html(self, soup):
+        for p in soup.findAll(name='p', text=re.compile('&nbsp;')):
+            p.extract()
+        return soup
--- a/recipes/navegalo.recipe
+++ b/recipes/navegalo.recipe
@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1360354988(BasicNewsRecipe):
+    title          = u'Navegalo.com'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class navegalonews(BasicNewsRecipe):
+    __author__     = 'Douglas Delgado'
+    title          = u'Navegalo.com'
+    publisher      = 'Navegalo.com'
+    description    = 'Noticias actualizadas por Navegalo.com. Recipe creado por Douglas Delgado (doudelgado@gmail.com) para su uso con Calibre'
+    category       = 'Spanish, Entertainment'
+    masthead_url   = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQZhML5lwsdss6FFF7CFR0Sf-Ln052Zmhs1TlIOcAL8JWN8a-dPlA'
+
+    oldest_article = 2
+    delay          = 1
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    encoding       = 'utf-8'
+    language       = 'es_CR'
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    remove_javascript = True
+    no_stylesheets = True
+
+
+    feeds          = [(u'Nacionales', u'http://www.navegalo.com/es/noticias/noticias/noticias-nacionales?format=feed&type=rss'), (u'Internacionales', u'http://direct.navegalo.com/es/noticias/noticias/noticias-internacionales?format=feed&type=rss'), (u'Deportes', u'http://direct.navegalo.com/es/noticias/noticias/deportes-nacionales?format=feed&type=rss'), (u'Solo futbol', u'http://www.navegalo.com/es/noticias/noticias/solo-futbol?format=feed&type=rss'), (u'Entretenimiento', u'http://www.navegalo.com/es/noticias/noticias/entretenimiento?format=feed&type=rss'), (u'Solo para ellas', u'http://www.navegalo.com/es/noticias/noticias/solo-para-ellas?format=feed&type=rss'), (u'Infiltrados', u'http://direct.navegalo.com/es/noticias/noticias/infiltrados?format=feed&type=rss'), (u'Mano a mano', u'http://direct.navegalo.com/es/noticias/noticias/mano-a-mano?format=feed&type=rss')]
+
+
+
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
+                '''
+
--- a/recipes/new_yorker.recipe
+++ b/recipes/new_yorker.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newyorker.com
 '''
@ -44,20 +44,18 @@ class NewYorker(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    keep_only_tags = [
-                        dict(name='div', attrs={'class':'headers'})
-                       ,dict(name='div', attrs={'id':['articleheads','items-container','articleRail','articletext','photocredits']})
-                     ]
+    keep_only_tags = [dict(name='div', attrs={'id':'pagebody'})]
    remove_tags    = [
                         dict(name=['meta','iframe','base','link','embed','object'])
-                        ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] })
+                        ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons','social-utils-top','entry-keywords','entry-categories','utilsPrintEmail'] })
                        ,dict(attrs={'id':['show-header','show-footer'] })
                     ]
+    remove_tags_after = dict(attrs={'class':'entry-content'}) 
    remove_attributes = ['lang']
    feeds             = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')]

    def print_version(self, url):
-        return url + '?printable=true'
+        return url + '?printable=true&currentPage=all'

    def image_url_processor(self, baseurl, url):
        return url.strip()
--- a/recipes/osworld_pl.recipe
+++ b/recipes/osworld_pl.recipe
@ -0,0 +1,33 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class OSWorld(BasicNewsRecipe):
+    title          = u'OSWorld.pl'
+    __author__        = 'fenuks'
+    description   = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!'
+    category       = 'OS, IT, open source, Linux'
+    language       = 'pl'
+    cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    use_embedded_content = False
+    keep_only_tags = [dict(id=['dzial', 'posts'])]
+    remove_tags = [dict(attrs={'class':'post-comments'})]
+    remove_tags_after = dict(attrs={'class':'entry clr'})
+    feeds          = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'), (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
+
+    def append_page(self, soup, appendtag):
+        tag = appendtag.find(attrs={'id':'paginacja'})
+        if tag:
+            for nexturl in tag.findAll('a'):
+                soup2 = self.index_to_soup(nexturl['href'])
+                pagetext = soup2.find(attrs={'class':'entry clr'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'id':'paginacja'}):
+                r.extract()
+
+
+    def preprocess_html(self, soup):
+         self.append_page(soup, soup.body)
+         return soup
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@ -1,5 +1,4 @@
 #!/usr/bin/env  python
-
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class PCLab(BasicNewsRecipe):
@ -8,12 +7,13 @@ class PCLab(BasicNewsRecipe):
    __author__            = 'ravcio - rlelusz[at]gmail.com'
    description           = u"Articles from PC Lab website"
    language              = 'pl'
-    oldest_article        = 30.0
+    oldest_article        = 30
    max_articles_per_feed = 100
    recursions            = 0
    encoding              = 'iso-8859-2'
    no_stylesheets        = True
    remove_javascript     = True
+    remove_empty_feeds = True
    use_embedded_content  = False

    keep_only_tags = [
@ -21,50 +21,54 @@ class PCLab(BasicNewsRecipe):
                     ]

    remove_tags = [
-            dict(name='div', attrs={'class':['chapters']})
-            ,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
+            dict(name='div', attrs={'class':['toc first', 'toc', 'tags', 'recommendedarticles', 'name', 'zumi', 'chapters']})
                  ]

-    remove_tags_after = [
-            dict(name='div', attrs={'class':['navigation']})
-                ]
-
    #links to RSS feeds
-    feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
+    feeds = [
+             (u'Aktualności', 'http://pclab.pl/xml/aktualnosci.xml'),
+             (u'Artykuły', u'http://pclab.pl/xml/artykuly.xml'),
+             (u'Poradniki', 'http://pclab.pl/xml/poradniki.xml')
+             ]

    #load second and subsequent page content
    # in: soup - full page with 'next' button
    # out: appendtag - tag to which new page is to be added
    def append_page(self, soup, appendtag):
        # find the 'Next' button
-        pager = soup.find('div', attrs={'class':'next'})
-
+        pager = soup.find('div', attrs={'class':'navigation'})
        if pager:
+            a = pager.find('a')
+            if 'news' in a['href']:
+                pager = None
+            else:
+                pager = pager.find('div', attrs={'class':'next'})
+
+        while pager:
            #search for 'a' element with link to next page (exit if not found)
            a = pager.find('a')
-            if a:
-                nexturl = a['href']
+            nexturl = a['href']
+            soup2 = self.index_to_soup('http://pclab.pl' + nexturl)
+            pager = soup2.find('div', attrs={'class':'next'})
+            pagetext = soup2.find('div', attrs={'class':'substance'})
+            pagetext = pagetext.find('div', attrs={'class':'data'})

-                soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
-
-                pagetext_substance = soup2.find('div', attrs={'class':'substance'})
-                pagetext = pagetext_substance.find('div', attrs={'class':'data'})
-                pagetext.extract()
-
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, pagetext)
-                pos = len(appendtag.contents)
-
-                self.append_page(soup2, appendtag)
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+            pos = len(appendtag.contents)

+        pager = soup.find('div', attrs={'class':'navigation'})
+        if pager:
+            pager.extract()

    def preprocess_html(self, soup):
-
        # soup.body contains no title and no navigator, they are in soup
        self.append_page(soup, soup.body)
-
+        for link in soup.findAll('a'):
+            href = link.get('href', None)
+            if href and href.startswith('/'):
+                link['href'] = 'http://pclab.pl' + href
        # finally remove some tags
-        tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
-        [tag.extract() for tag in tags]
+        #for r in soup.findAll('div', attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})

        return soup
--- a/recipes/pnn.recipe
+++ b/recipes/pnn.recipe
@ -0,0 +1,55 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+'''Calibre recipe to convert the RSS feeds of the PNN to an ebook.'''
+
+class SportsIllustratedRecipe(BasicNewsRecipe) :
+    __author__    = 'n.kucklaender'
+    __copyright__ = 'a.peter'
+    __license__   = 'GPL v3'
+    language      = 'de'
+    description   = 'PNN RSS'
+    version       = 1
+    title         = u'PNN'
+    timefmt       = ' [%d.%m.%Y]'
+
+    oldest_article = 7.0
+    no_stylesheets = True
+    remove_javascript = True
+    use_embedded_content = False
+    publication_type = 'newspaper'
+
+    remove_empty_feeds = True
+    remove_tags = [dict(attrs={'class':['um-weather um-header-weather','um-has-sub um-mainnav','um-box','ts-products','um-meta-nav','um-box um-last','um-footer','um-footer-links','share hidden','um-buttons']}),dict(id=['dinsContainer'])]
+   # remove_tags_before = [dict(name='div', attrs={'class':'um-first'})]
+   # remove_tags_after = [dict(name='div', attrs={'class':'um-metabar'})]
+
+    feeds = [(u'Titelseite', u'http://www.pnn.de/rss.xml'),
+             (u'Dritte Seite', u'http://www.pnn.de/dritte-seite/rss.xml'),
+             (u'Politik', u'http://www.pnn.de/politik/rss.xml'),
+             (u'Meinung', u'http://www.pnn.de/meinung/rss.xml'),
+             (u'Potsdam', u'http://www.pnn.de/potsdam/rss.xml'),
+             (u'Havel-Spree', u'http://www.pnn.de/havel-spree/rss.xml'),
+             (u'Potsdam-Mittelmark', u'http://www.pnn.de/pm/rss.xml'),
+             (u'Berlin-Brandenburg', u'http://www.pnn.de/brandenburg-berlin/rss.xml'),
+             (u'Wirtschaft', u'http://www.pnn.de/wirtschaft/rss.xml'),
+             (u'Sport', u'http://www.pnn.de/sport/rss.xml'),
+             (u'Regionalsport', u'http://www.pnn.de/regionalsport/rss.xml'),
+             (u'Kultur', u'http://www.pnn.de/kultur/rss.xml'),
+             (u'Potsdam-Kultur', u'http://www.pnn.de/potsdam-kultur/rss.xml'),
+             (u'Wissen', u'http://www.pnn.de/wissen/rss.xml'),
+             (u'Medien', u'http://www.pnn.de/medien/rss.xml'),
+             (u'Weltspiegel', u'http://www.pnn.de/weltspiegel/rss.xml'),
+             (u'Wissenschaft', u'http://www.pnn.de/campus/rss.xml'),
+             (u'Mobil', u'http://www.pnn.de/mobil/rss.xml'),
+             (u'Reise', u'http://www.pnn.de/reise/rss.xml'),
+             (u'Ratgeber', u'http://www.pnn.de/ratgeber/rss.xml'),
+             (u'Fragen des Tages', u'http://www.pnn.de/fragen-des-tages/rss.xml'),
+           # (u'Potsdam bin ich', u'http://www.pnn.de/potsdam-bin-ich/rss.xml'),
+             (u'Leserbriefe', u'http://www.pnn.de/leserbriefe/rss.xml')]
+
+    def get_masthead_url(self):
+        return 'http://www.pnn.de/app/base/img/pnn_logo.png'
+
+    def print_version(self, url):
+        return url.replace('.html', ',view,printVersion.html')
+
--- a/recipes/pravda_rs.recipe
+++ b/recipes/pravda_rs.recipe
@ -0,0 +1,85 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+www.pravda.rs
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Pravda_rs(BasicNewsRecipe):
+    title                 = 'Dnevne novine Pravda'
+    __author__            = 'Darko Miletic'
+    description           = '24 sata portal vesti iz Srbije'
+    publisher             = 'Dnevne novine Pravda'
+    category              = 'news, politics, entertainment, Serbia'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'sr'
+    publication_type      = 'newspaper'
+    remove_empty_feeds    = True    
+    PREFIX                = 'http://www.pravda.rs'
+    FEEDPR                = PREFIX + '/category/'
+    LANGLAT               = '?lng=lat'
+    FEEDSU                = '/feed/' + LANGLAT
+    INDEX                 = PREFIX + LANGLAT
+    masthead_url          = 'http://www.pravda.rs/wp-content/uploads/2012/09/logoof.png'
+    extra_css             = """
+                               @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
+                               body{font-family: Georgia,"Times New Roman",Times,serif1,serif;}
+                               img{display: block}
+                            """
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    keep_only_tags    = [dict(name='div', attrs={'class':'post'})]
+    remove_tags       = [dict(name='h3')]
+    remove_tags_after = dict(name='h3')
+    
+    feeds = [
+              (u'Politika' , FEEDPR + 'politika/' + FEEDSU),
+              (u'Tema Dana', FEEDPR + 'tema-dana/' + FEEDSU),
+              (u'Hronika'  , FEEDPR + 'hronika/' + FEEDSU),
+              (u'Društvo'  , FEEDPR + 'drustvo/' + FEEDSU),
+              (u'Ekonomija', FEEDPR + 'ekonomija/' + FEEDSU),
+              (u'Srbija'   , FEEDPR + 'srbija/' + FEEDSU),
+              (u'Beograd'  , FEEDPR + 'beograd/' + FEEDSU),
+              (u'Kultura'  , FEEDPR + 'kultura/' + FEEDSU),
+              (u'Zabava'   , FEEDPR + 'zabava/' + FEEDSU),
+              (u'Sport'    , FEEDPR + 'sport/' + FEEDSU),
+              (u'Svet'     , FEEDPR + 'svet/' + FEEDSU),
+              (u'Porodica' , FEEDPR + 'porodica/' + FEEDSU),
+              (u'Vremeplov', FEEDPR + 'vremeplov/' + FEEDSU),
+              (u'IT'       , FEEDPR + 'it/' + FEEDSU),
+              (u'Republika Srpska', FEEDPR + 'republika-srpska/' + FEEDSU),
+              (u'Crna Gora', FEEDPR + 'crna-gora/' + FEEDSU),
+              (u'EX YU'    , FEEDPR + 'eks-ju/' + FEEDSU),
+              (u'Dijaspora', FEEDPR + 'dijaspora/' + FEEDSU),
+              (u'Kolumna'  , FEEDPR + 'kolumna/' + FEEDSU),
+              (u'Afere'    , FEEDPR + 'afere/' + FEEDSU),
+              (u'Feljton'  , FEEDPR + 'feljton/' + FEEDSU),
+              (u'Intervju' , FEEDPR + 'intervju/' + FEEDSU),
+              (u'Reportaža', FEEDPR + 'reportaza/' + FEEDSU),
+              (u'Zanimljivosti', FEEDPR + 'zanimljivosti/' + FEEDSU),
+              (u'Sa trga'  , FEEDPR + 'sa-trga/' + FEEDSU)
+            ]
+
+    def print_version(self, url):
+        return url + self.LANGLAT
+
+    def preprocess_raw_html(self, raw, url):
+       return '<html><head><title>title</title>'+raw[raw.find('</head>'):]
+        
--- a/recipes/revista_cromos.recipe
+++ b/recipes/revista_cromos.recipe
@ -0,0 +1,33 @@
+# coding=utf-8
+# https://github.com/iemejia/calibrecolombia
+
+'''
+http://www.cromos.com.co/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElMalpensante(BasicNewsRecipe):
+    title                 = u'Revista Cromos'
+    language              = 'es_CO'
+    __author__            = 'Ismael Mejia <iemejia@gmail.com>'
+    cover_url             = 'http://www.cromos.com.co/sites/cromos.com.co/themes/cromos_theme/images/logo_morado.gif'
+    description           = 'Revista Cromos'
+    oldest_article        = 7
+    simultaneous_downloads = 20
+    #tags = 'news, sport, blog'
+    use_embedded_content = True
+    remove_empty_feeds = True
+    max_articles_per_feed = 100
+    feeds          = [(u'Cromos', u'http://www.cromos.com.co/rss.xml'),
+                      (u'Moda', u'http://www.cromos.com.co/moda/feed'),
+                      (u'Estilo de Vida', u'http://www.cromos.com.co/estilo-de-vida/feed'),
+                      (u'Cuidado Personal', u'http://www.cromos.com.co/estilo-de-vida/cuidado-personal/feed'),
+                      (u'Salud y Alimentación', u'http://www.cromos.com.co/estilo-de-vida/salud-y-alimentacion/feed'),
+                      (u'Personajes', u'http://www.cromos.com.co/personajes/feed'), 
+                      (u'Actualidad', u'http://www.cromos.com.co/personajes/actualidad/feed'),
+                      (u'Espectáculo', u'http://www.cromos.com.co/personajes/espectaculo/feed'),
+                      (u'Reportajes', u'http://www.cromos.com.co/reportajes/feed'),
+                      (u'Eventos', u'http://www.cromos.com.co/eventos/feed'),
+                      (u'Modelos', u'http://www.cromos.com.co/modelos/feed'),
+                      ]
--- a/recipes/spiders_web_pl.recipe
+++ b/recipes/spiders_web_pl.recipe
@ -5,11 +5,14 @@ class SpidersWeb(BasicNewsRecipe):
    oldest_article = 7
    __author__        = 'fenuks'
    description   = u''
-    cover_url      = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg'
+    cover_url      = 'http://www.spidersweb.pl/wp-content/themes/new_sw/images/spidersweb.png'
    category       = 'IT, WEB'
    language       = 'pl'
    no_stylesheers=True
+    remove_javascript = True
+    use_embedded_content = False
    max_articles_per_feed = 100
-    keep_only_tags=[dict(id='Post')]
-    remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']}), dict(id='Author-Column')]
+    keep_only_tags=[dict(id='start')]
+    remove_tags_after = dict(attrs={'class':'padding20'})
+    remove_tags=[dict(name='div', attrs={'class':['padding border-bottom', 'padding20', 'padding border-top']})]
    feeds          = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]
--- a/recipes/ubuntu_pomoc_org.recipe
+++ b/recipes/ubuntu_pomoc_org.recipe
@ -0,0 +1,22 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+class UbuntuPomoc(BasicNewsRecipe):
+    title          = u'Ubuntu-pomoc.org'
+    __author__        = 'fenuks'
+    description   = u'Strona poświęcona systemowi Ubuntu Linux. Znajdziesz tutaj przydatne i sprawdzone poradniki oraz sposoby rozwiązywania wielu popularnych problemów. Ten blog rozwiąże każdy Twój problem - jeśli nie teraz, to wkrótce! :)'
+    category       = 'Linux, Ubuntu, open source'
+    language       = 'pl'
+    cover_url = 'http://www.ubuntu-pomoc.org/grafika/ubuntupomoc.png'
+    preprocess_regexps = [(re.compile(r'<div class="ciekawostka">.+', re.IGNORECASE|re.DOTALL), lambda m: '')]
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_javascript = True
+    remove_empty_feeds = True
+    use_embedded_content = False
+    remove_attrs = ['style']
+    keep_only_tags = [dict(attrs={'class':'post'})]
+    remove_tags_after = dict(attrs={'class':'underEntry'})
+    remove_tags = [dict(attrs={'class':['underPostTitle', 'yarpp-related', 'underEntry', 'social', 'tags', 'commentlist', 'youtube_sc']}), dict(id=['wp_rp_first', 'commentReply'])]
+    feeds          = [(u'Ca\u0142o\u015b\u0107', u'http://feeds.feedburner.com/Ubuntu-Pomoc'),
+                     (u'Gry', u'http://feeds.feedburner.com/GryUbuntu-pomoc')]
--- a/recipes/unperiodico.recipe
+++ b/recipes/unperiodico.recipe
@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# https://github.com/iemejia/calibrecolombia
+
+'''
+http://www.unperiodico.unal.edu.co/
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class UNPeriodico(BasicNewsRecipe):
+    title                 = u'UN Periodico'
+    language = 'es_CO'
+    __author__            = 'Ismael Mejia <iemejia@gmail.com>'
+    cover_url             = 'http://www.unperiodico.unal.edu.co/fileadmin/templates/periodico/img/logoperiodico.png'
+    description           = 'UN Periodico'
+    oldest_article        = 30
+    max_articles_per_feed = 100
+    publication_type      = 'newspaper'
+    feeds          = [
+    		   (u'UNPeriodico', u'http://www.unperiodico.unal.edu.co/rss/type/rss2/')
+		   ]
--- a/recipes/volksrant.recipe
+++ b/recipes/volksrant.recipe
@ -41,17 +41,9 @@ class AdvancedUserRecipe1249039563(BasicNewsRecipe):
   #######################################################################################################
    temp_files = []
    articles_are_obfuscated = True
-    use_javascript_to_login = True
-
-    def javascript_login(self, br, username, password):
-        'Volksrant wants the user to explicitly allow cookies'
-        if not br.visit('http://www.volkskrant.nl'):
-            raise Exception('Failed to connect to volksrant website')
-        br.click('#pop_cookie_text a[onclick]', wait_for_load=True, timeout=120)

    def get_obfuscated_article(self, url):
        br = self.browser.clone_browser()
-        print 'THE CURRENT URL IS: ', url
        br.open(url)
        year = date.today().year

--- a/recipes/wprost.recipe
+++ b/recipes/wprost.recipe
@ -10,89 +10,89 @@ from calibre.web.feeds.news import BasicNewsRecipe
 import re

 class Wprost(BasicNewsRecipe):
-        EDITION = 0
-        FIND_LAST_FULL_ISSUE = True
-        EXCLUDE_LOCKED = True
-        ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'
+    EDITION = 0
+    FIND_LAST_FULL_ISSUE = True
+    EXCLUDE_LOCKED = True
+    ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'
+    title = u'Wprost'
+    __author__ = 'matek09'
+    description = 'Weekly magazine'
+    encoding = 'ISO-8859-2'
+    no_stylesheets = True
+    language = 'pl'
+    remove_javascript = True
+    recursions = 0  
+    remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+    remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+    '''
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))
+    '''

-        title = u'Wprost'
-        __author__ = 'matek09'
-        description = 'Weekly magazine'
-        encoding = 'ISO-8859-2'
-        no_stylesheets = True
-        language = 'pl'
-        remove_javascript = True
-	recursions = 0	
-
-        remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
-        remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
-
-        '''keep_only_tags =[]
-        keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
-        keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
-        keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
-        keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
-
-        preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
+    preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
        (re.compile(r'display: block;'), lambda match: ''),
        (re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
        (re.compile(r'\<table .*?\>'), lambda match: ''),
        (re.compile(r'\<tr>'), lambda match: ''),
        (re.compile(r'\<td .*?\>'), lambda match: ''),
-	(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
+        (re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]

-        remove_tags =[]
-        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
-        remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
-        remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))


-        extra_css = '''
-                                        .div-header {font-size: x-small; font-weight: bold}
-                                        '''
-#h2 {font-size: x-large; font-weight: bold}
-        def is_blocked(self, a):
-                if a.findNextSibling('img') is None:
-                        return False
-                else:
-                        return True
+    extra_css = '''.div-header {font-size: x-small; font-weight: bold}'''
+    #h2 {font-size: x-large; font-weight: bold}
+    def is_blocked(self, a):
+        if a.findNextSibling('img') is None:
+            return False
+        else:
+            return True


+    def find_last_issue(self):
+        soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
+        a = 0
+        if self.FIND_LAST_FULL_ISSUE:
+            ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
+            a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
+        else:
+            a = soup.find('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
+        self.EDITION = a['href'].replace('/tygodnik/?I=', '')
+        self.EDITION_SHORT = a['href'].replace('/tygodnik/?I=15', '')
+        self.cover_url = a.img['src']

-        def find_last_issue(self):
-                soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
-                a = 0
-                if self.FIND_LAST_FULL_ISSUE:
-                        ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
-                        a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
-                else:
-                        a = soup.find('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
-                self.EDITION = a['href'].replace('/tygodnik/?I=', '')
-		self.EDITION_SHORT = a['href'].replace('/tygodnik/?I=15', '')
-		self.cover_url = a.img['src']
+    def parse_index(self):
+        self.find_last_issue()
+        soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
+        feeds = []
+        headers = soup.findAll(attrs={'class':'block-header block-header-left mtop20 mbottom20'})
+        articles_list = soup.findAll(attrs={'class':'standard-box'})
+        for i in range(len(headers)):
+            articles = self.find_articles(articles_list[i])
+            if len(articles) > 0:
+                section = headers[i].find('a').string
+                feeds.append((section, articles))
+        return feeds

-        def parse_index(self):
-                self.find_last_issue()
-                soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
-                feeds = []
-                for main_block in soup.findAll(attrs={'id': 'content-main-column-element-content'}):
-                        articles = list(self.find_articles(main_block))
-                        if len(articles) > 0:
-                                section = self.tag_to_string(main_block.find('h3'))
-                                feeds.append((section, articles))
-                return feeds
-
-        def find_articles(self, main_block):
-                for a in main_block.findAll('a'):
-                        if a.name in "td":
-                                break
-                        if self.EXCLUDE_LOCKED & self.is_blocked(a):
-                                continue
-                        yield {
-                                'title' : self.tag_to_string(a),
-                                'url'   : 'http://www.wprost.pl' + a['href'],
-                                'date'  : '',
-                                'description' : ''
-                                }
+    def find_articles(self, main_block):
+        articles = []
+        for a in main_block.findAll('a'):
+            if a.name in "td":
+                break
+            if self.EXCLUDE_LOCKED and self.is_blocked(a):
+                continue
+            articles.append({
+                'title' : self.tag_to_string(a),
+                'url'   : 'http://www.wprost.pl' + a['href'],
+                'date'  : '',
+                'description' : ''
+            })
+        return articles


--- a/recipes/wprost_rss.recipe
+++ b/recipes/wprost_rss.recipe
@ -0,0 +1,71 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+__copyright__ = 'Modified 2011,  Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
+__copyright__ = 'Modified 2012,  Artur Stachecki <artur.stachecki@gmail.com>'
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Wprost(BasicNewsRecipe):
+    title = u'Wprost (RSS)'
+    __author__ = 'matek09'
+    description = 'Weekly magazine'
+    encoding = 'ISO-8859-2'
+    no_stylesheets = True
+    language = 'pl'
+    remove_javascript = True
+    recursions = 0
+    use_embedded_content = False
+    remove_empty_feeds = True
+    remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+    remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+    '''
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))
+    '''
+
+    preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
+        (re.compile(r'display: block;'), lambda match: ''),
+        (re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
+        (re.compile(r'\<table .*?\>'), lambda match: ''),
+        (re.compile(r'\<tr>'), lambda match: ''),
+        (re.compile(r'\<td .*?\>'), lambda match: ''),
+        (re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
+
+
+    extra_css = '''.div-header {font-size: x-small; font-weight: bold}'''
+    #h2 {font-size: x-large; font-weight: bold}
+
+    feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'),
+	(u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'),
+	(u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'),
+	(u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'),
+	(u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'),
+	(u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'),
+	(u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'),
+	(u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'),
+	(u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'),
+	(u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'),
+	(u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'),
+	(u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'),
+	(u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'),
+	(u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php')
+	]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.wprost.pl/tygodnik')
+        cover = soup.find(attrs={'class':'wprost-cover'})
+        if cover:
+            self.cover_url = cover['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -55,20 +55,14 @@ class WallStreetJournal(BasicNewsRecipe):
                    ]
    remove_tags_after = [dict(id="article_story_body"), {'class':"article story"},]

+    use_javascript_to_login = True

-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            br.open('http://commerce.wsj.com/auth/login')
-            br.select_form(nr=1)
-            br['user']   = self.username
-            br['password'] = self.password
-            res = br.submit()
-            raw = res.read()
-            if 'Welcome,' not in raw and '>Logout<' not in raw and '>Log Out<' not in raw:
-                raise ValueError('Failed to log in to wsj.com, check your '
-                        'username and password')
-        return br
+    def javascript_login(self, br, username, password):
+        br.visit('https://id.wsj.com/access/pages/wsj/us/login_standalone.html?mg=com-wsj', timeout=120)
+        f = br.select_form(nr=0)
+        f['username'] = username
+        f['password'] = password
+        br.submit(timeout=120)

    def populate_article_metadata(self, article, soup, first):
        if first and hasattr(self, 'add_toc_thumbnail'):
--- a/recipes/zeitde_sub.recipe
+++ b/recipes/zeitde_sub.recipe
@ -88,7 +88,7 @@ class ZeitEPUBAbo(BasicNewsRecipe):
        (re.compile(u' \u00AB'), lambda match: u'\u00AB '), # before closing quotation
        (re.compile(u'\u00BB '), lambda match: u' \u00BB'), # after opening quotation
        # filtering for spaces in large numbers for better readability
-        (re.compile(r'(?<=\d\d)(?=\d\d\d[ ,\.;\)<\?!-])'), lambda match: u'\u2008'), # end of the number with some character following
+        (re.compile(r'(?<=\d\d)(?=\d\d\d[ ,;\)<\?!-])'), lambda match: u'\u2008'), # end of the number with some character following
        (re.compile(r'(?<=\d\d)(?=\d\d\d. )'), lambda match: u'\u2008'), # end of the number with full-stop following, then space is necessary (avoid file names)
        (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
        (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@ -356,6 +356,10 @@ h2.library_name {
    color: red;
 }

+#booklist a.summary_thumb img {
+    border: none
+}
+
 #booklist > #pagelist { display: none; }

 #goto_page_dialog ul {
@ -474,5 +478,9 @@ h2.library_name {
    color: red
 }

+.details a.details_thumb img {
+    border: none
+}
+
 /* }}} */

--- a/resources/content_server/browse/details.html
+++ b/resources/content_server/browse/details.html
@ -1,6 +1,6 @@
 <div id="details_{id}" class="details">
    <div class="left">
-        <img alt="Cover of {title}" src="{prefix}/get/cover/{id}" />
+        <a href="{get_url}" title="Click to read {title} in the {fmt} format" class="details_thumb"><img alt="Cover of {title}" src="{prefix}/get/cover/{id}" /></a>
    </div>
    <div class="right">
        <div class="field formats">{formats}</div>
--- a/resources/content_server/browse/summary.html
+++ b/resources/content_server/browse/summary.html
@ -1,6 +1,6 @@
 <div id="summary_{id}" class="summary">
    <div class="left">
-        <img alt="Cover of {title}" src="{prefix}/get/thumb_90_120/{id}" />
+        <a href="{get_url}" class="summary_thumb" title="Click to read {title} in the {fmt} format"><img alt="Cover of {title}" src="{prefix}/get/thumb_90_120/{id}" /></a>
        {get_button}
    </div>
    <div class="right">
--- a/resources/content_server/index.html
+++ b/resources/content_server/index.html
@ -1,5 +1,5 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 <?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" version="XHTML 1.1" xml:lang="en">
 	<head>
 		<title>calibre library</title>
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -464,12 +464,15 @@ server_listen_on = '0.0.0.0'
 # on at your own risk!
 unified_title_toolbar_on_osx = False

-#: Save original file when converting from same format to same format
+#: Save original file when converting/polishing from same format to same format
 # When calibre does a conversion from the same format to the same format, for
 # example, from EPUB to EPUB, the original file is saved, so that in case the
 # conversion is poor, you can tweak the settings and run it again. By setting
 # this to False you can prevent calibre from saving the original file.
+# Similarly, by setting save_original_format_when_polishing to False you can
+# prevent calibre from saving the original file when polishing.
 save_original_format = True
+save_original_format_when_polishing = True

 #: Number of recently viewed books to show
 # Right-clicking the View button shows a list of recently viewed books. Control
--- a/setup/check.py
+++ b/setup/check.py
@ -95,9 +95,9 @@ class Check(Command):
                    errors = True
                    self.report_errors(w)
            else:
+                from calibre.utils.serve_coffee import check_coffeescript
                try:
-                    subprocess.check_call(['coffee', '-c', '-p', f],
-                            stdout=open(os.devnull, 'wb'))
+                   check_coffeescript(f)
                except:
                    errors = True
            if errors:
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@ -12,14 +12,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2013-01-19 20:28+0000\n"
+"PO-Revision-Date: 2013-02-19 18:01+0000\n"
 "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
 "Language-Team: Catalan <linux@softcatala.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2013-01-20 04:36+0000\n"
-"X-Generator: Launchpad (build 16430)\n"
+"X-Launchpad-Export-Date: 2013-02-20 04:50+0000\n"
+"X-Generator: Launchpad (build 16491)\n"
 "Language: ca\n"

 #. name for aaa
@ -1920,7 +1920,7 @@ msgstr "Arára; Mato Grosso"

 #. name for axk
 msgid "Yaka (Central African Republic)"
-msgstr "Yaka (República Centreafricana)"
+msgstr "Yaka (República Centrafricana)"

 #. name for axm
 msgid "Armenian; Middle"
@ -3528,7 +3528,7 @@ msgstr "Buamu"

 #. name for boy
 msgid "Bodo (Central African Republic)"
-msgstr "Bodo (República Centreafricana)"
+msgstr "Bodo (República Centrafricana)"

 #. name for boz
 msgid "Bozo; Tiéyaxo"
@ -7928,7 +7928,7 @@ msgstr "Oromo; occidental"

 #. name for gba
 msgid "Gbaya (Central African Republic)"
-msgstr "Gbaya (República Centreafricana)"
+msgstr "Gbaya (República Centrafricana)"

 #. name for gbb
 msgid "Kaytetye"
@ -11184,7 +11184,7 @@ msgstr ""

 #. name for kbn
 msgid "Kare (Central African Republic)"
-msgstr "Kare (República Centreafricana)"
+msgstr "Kare (República Centrafricana)"

 #. name for kbo
 msgid "Keliko"
@ -20720,7 +20720,7 @@ msgstr "Pitjantjatjara"

 #. name for pka
 msgid "Prākrit; Ardhamāgadhī"
-msgstr ""
+msgstr "Pràcrit; Ardhamagadhi"

 #. name for pkb
 msgid "Pokomo"
@ -20776,31 +20776,31 @@ msgstr "Polonombauk"

 #. name for plc
 msgid "Palawano; Central"
-msgstr ""
+msgstr "Palawà; Central"

 #. name for pld
 msgid "Polari"
-msgstr ""
+msgstr "Polari"

 #. name for ple
 msgid "Palu'e"
-msgstr ""
+msgstr "Palue"

 #. name for plg
 msgid "Pilagá"
-msgstr ""
+msgstr "Pilagà"

 #. name for plh
 msgid "Paulohi"
-msgstr ""
+msgstr "Paulohi"

 #. name for pli
 msgid "Pali"
-msgstr ""
+msgstr "Pali"

 #. name for plj
 msgid "Polci"
-msgstr ""
+msgstr "Polci"

 #. name for plk
 msgid "Shina; Kohistani"
@ -20812,19 +20812,19 @@ msgstr "Palaung; Shwe"

 #. name for pln
 msgid "Palenquero"
-msgstr ""
+msgstr "Palenquero"

 #. name for plo
 msgid "Popoluca; Oluta"
-msgstr ""
+msgstr "Popoluca; Oluta"

 #. name for plp
 msgid "Palpa"
-msgstr ""
+msgstr "Palpa"

 #. name for plq
 msgid "Palaic"
-msgstr ""
+msgstr "Palaic"

 #. name for plr
 msgid "Senoufo; Palaka"
@ -20840,15 +20840,15 @@ msgstr "Malgaix; Plateau"

 #. name for plu
 msgid "Palikúr"
-msgstr ""
+msgstr "Palikur"

 #. name for plv
 msgid "Palawano; Southwest"
-msgstr ""
+msgstr "Palawà; Sudoccidental"

 #. name for plw
 msgid "Palawano; Brooke's Point"
-msgstr ""
+msgstr "Palawà; Brooke"

 #. name for ply
 msgid "Bolyu"
@ -20856,43 +20856,43 @@ msgstr ""

 #. name for plz
 msgid "Paluan"
-msgstr ""
+msgstr "Paluà"

 #. name for pma
 msgid "Paama"
-msgstr ""
+msgstr "Paama"

 #. name for pmb
 msgid "Pambia"
-msgstr ""
+msgstr "Pambia"

 #. name for pmc
 msgid "Palumata"
-msgstr ""
+msgstr "Palumata"

 #. name for pme
 msgid "Pwaamei"
-msgstr ""
+msgstr "Pwaamei"

 #. name for pmf
 msgid "Pamona"
-msgstr ""
+msgstr "Pamona"

 #. name for pmh
 msgid "Prākrit; Māhārāṣṭri"
-msgstr ""
+msgstr "Pràcrit; Maharastri"

 #. name for pmi
 msgid "Pumi; Northern"
-msgstr ""
+msgstr "Pumi; Septentrional"

 #. name for pmj
 msgid "Pumi; Southern"
-msgstr ""
+msgstr "Pumi; Meridional"

 #. name for pmk
 msgid "Pamlico"
-msgstr ""
+msgstr "Algonquí Carolina"

 #. name for pml
 msgid "Lingua Franca"
@ -20904,11 +20904,11 @@ msgstr "Pol"

 #. name for pmn
 msgid "Pam"
-msgstr ""
+msgstr "Pam"

 #. name for pmo
 msgid "Pom"
-msgstr ""
+msgstr "Pom"

 #. name for pmq
 msgid "Pame; Northern"
@ -20916,11 +20916,11 @@ msgstr "Pame; Septentrional"

 #. name for pmr
 msgid "Paynamar"
-msgstr ""
+msgstr "Paynamar"

 #. name for pms
 msgid "Piemontese"
-msgstr ""
+msgstr "Piemontès"

 #. name for pmt
 msgid "Tuamotuan"
@ -20956,7 +20956,7 @@ msgstr "Panjabi; Occidental"

 #. name for pnc
 msgid "Pannei"
-msgstr ""
+msgstr "Pannei"

 #. name for pne
 msgid "Penan; Western"
@ -20964,11 +20964,11 @@ msgstr "Penan; Occidental"

 #. name for png
 msgid "Pongu"
-msgstr ""
+msgstr "Pongu"

 #. name for pnh
 msgid "Penrhyn"
-msgstr ""
+msgstr "Penrhyn"

 #. name for pni
 msgid "Aoheng"
@ -20976,27 +20976,27 @@ msgstr ""

 #. name for pnm
 msgid "Punan Batu 1"
-msgstr ""
+msgstr "Punan Batu"

 #. name for pnn
 msgid "Pinai-Hagahai"
-msgstr ""
+msgstr "Pinai-Hagahai"

 #. name for pno
 msgid "Panobo"
-msgstr ""
+msgstr "Panobo"

 #. name for pnp
 msgid "Pancana"
-msgstr ""
+msgstr "Pancana"

 #. name for pnq
 msgid "Pana (Burkina Faso)"
-msgstr ""
+msgstr "Pana (Burkina Faso)"

 #. name for pnr
 msgid "Panim"
-msgstr ""
+msgstr "Panim"

 #. name for pns
 msgid "Ponosakan"
@ -21028,7 +21028,7 @@ msgstr ""

 #. name for pnz
 msgid "Pana (Central African Republic)"
-msgstr ""
+msgstr "Pana (República Centrafricana)"

 #. name for poc
 msgid "Poqomam"
@ -21056,7 +21056,7 @@ msgstr ""

 #. name for poi
 msgid "Popoluca; Highland"
-msgstr ""
+msgstr "Popoluca; Muntanya"

 #. name for pok
 msgid "Pokangá"
@ -21084,7 +21084,7 @@ msgstr ""

 #. name for poq
 msgid "Popoluca; Texistepec"
-msgstr ""
+msgstr "Popoluca; Texistepec"

 #. name for por
 msgid "Portuguese"
@ -21092,7 +21092,7 @@ msgstr "Portuguès"

 #. name for pos
 msgid "Popoluca; Sayula"
-msgstr ""
+msgstr "Popoluca; Sayula"

 #. name for pot
 msgid "Potawatomi"
@ -21336,7 +21336,7 @@ msgstr "Paixtú; Central"

 #. name for psu
 msgid "Prākrit; Sauraseni"
-msgstr ""
+msgstr "Pràcrit; Sauraseni"

 #. name for psw
 msgid "Port Sandwich"
--- a/setup/iso_639/pt.po
+++ b/setup/iso_639/pt.po
@ -10,19 +10,19 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-09-27 16:52+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"PO-Revision-Date: 2013-02-18 02:41+0000\n"
+"Last-Translator: pedro jorge oliveira <pedrojorgeoliveira93@gmail.com>\n"
 "Language-Team: Portuguese <pt@li.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:34+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2013-02-19 04:56+0000\n"
+"X-Generator: Launchpad (build 16491)\n"
 "Language: pt\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
@ -498,7 +498,7 @@ msgstr ""

 #. name for afr
 msgid "Afrikaans"
-msgstr "Africanos"
+msgstr "Africano"

 #. name for afs
 msgid "Creole; Afro-Seminole"
@ -910,7 +910,7 @@ msgstr ""

 #. name for ale
 msgid "Aleut"
-msgstr "aleúte"
+msgstr "Aleúte"

 #. name for alf
 msgid "Alege"
@ -30818,7 +30818,7 @@ msgstr ""

 #. name for zxx
 msgid "No linguistic content"
-msgstr ""
+msgstr "Sem conteúdo linguistico"

 #. name for zyb
 msgid "Zhuang; Yongbei"
--- a/setup/iso_639/pt_BR.po
+++ b/setup/iso_639/pt_BR.po
@ -9,14 +9,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-12-21 03:31+0000\n"
-"Last-Translator: Fábio Malcher Miranda <mirand863@hotmail.com>\n"
+"PO-Revision-Date: 2013-02-17 21:57+0000\n"
+"Last-Translator: Neliton Pereira Jr. <nelitonpjr@gmail.com>\n"
 "Language-Team: Brazilian Portuguese\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-12-22 04:59+0000\n"
-"X-Generator: Launchpad (build 16378)\n"
+"X-Launchpad-Export-Date: 2013-02-18 04:49+0000\n"
+"X-Generator: Launchpad (build 16491)\n"
 "Language: \n"

 #. name for aaa
@ -141,7 +141,7 @@ msgstr ""

 #. name for abh
 msgid "Arabic; Tajiki"
-msgstr ""
+msgstr "Arábico; Tajiki"

 #. name for abi
 msgid "Abidji"
--- a/setup/iso_639/vi.po
+++ b/setup/iso_639/vi.po
@ -9,43 +9,43 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-09-27 16:56+0000\n"
-"Last-Translator: Clytie Siddall <clytie@riverland.net.au>\n"
+"PO-Revision-Date: 2013-02-15 06:39+0000\n"
+"Last-Translator: baduong <Unknown>\n"
 "Language-Team: Vietnamese <gnomevi-list@lists.sourceforge.net>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-11-26 05:44+0000\n"
-"X-Generator: Launchpad (build 14381)\n"
+"X-Launchpad-Export-Date: 2013-02-16 04:56+0000\n"
+"X-Generator: Launchpad (build 16491)\n"
 "Language: vi\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr ""
+msgstr "Alumu-Tesu"

 #. name for aac
 msgid "Ari"
-msgstr ""
+msgstr "Ari"

 #. name for aad
 msgid "Amal"
-msgstr ""
+msgstr "Amal"

 #. name for aae
 msgid "Albanian; Arbëreshë"
-msgstr ""
+msgstr "An-ba-ni"

 #. name for aaf
 msgid "Aranadan"
-msgstr ""
+msgstr "Aranadan"

 #. name for aag
 msgid "Ambrak"
-msgstr ""
+msgstr "Ambrak"

 #. name for aah
 msgid "Arapesh; Abu'"
@ -30817,7 +30817,7 @@ msgstr ""

 #. name for zxx
 msgid "No linguistic content"
-msgstr ""
+msgstr "Không có nội dung kiểu ngôn ngữ"

 #. name for zyb
 msgid "Zhuang; Yongbei"
@ -30829,11 +30829,11 @@ msgstr ""

 #. name for zyj
 msgid "Zhuang; Youjiang"
-msgstr ""
+msgstr "Zhuang; Youjiang"

 #. name for zyn
 msgid "Zhuang; Yongnan"
-msgstr ""
+msgstr "Zhuang; Yongnan"

 #. name for zyp
 msgid "Zyphe"
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 9, 18)
+numeric_version = (0, 9, 20)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -624,12 +624,6 @@ from calibre.library.catalogs.epub_mobi import EPUB_MOBI
 plugins += [CSV_XML, BIBTEX, EPUB_MOBI]
 # }}}

-# EPUB Fix plugins {{{
-from calibre.ebooks.epub.fix.unmanifested import Unmanifested
-from calibre.ebooks.epub.fix.epubcheck import Epubcheck
-plugins += [Unmanifested, Epubcheck]
-# }}}
-
 # Profiles {{{
 from calibre.customize.profiles import input_profiles, output_profiles
 plugins += input_profiles + output_profiles
@ -790,11 +784,11 @@ class ActionConvert(InterfaceActionBase):
    actual_plugin = 'calibre.gui2.actions.convert:ConvertAction'
    description = _('Convert books to various ebook formats')

-# class ActionPolish(InterfaceActionBase):
-#     name = 'Polish Books'
-#     actual_plugin = 'calibre.gui2.actions.polish:PolishAction'
-#     description = _('Fine tune your ebooks')
-#
+class ActionPolish(InterfaceActionBase):
+    name = 'Polish Books'
+    actual_plugin = 'calibre.gui2.actions.polish:PolishAction'
+    description = _('Fine tune your ebooks')
+
 class ActionDelete(InterfaceActionBase):
    name = 'Remove Books'
    actual_plugin = 'calibre.gui2.actions.delete:DeleteAction'
@ -930,7 +924,7 @@ class ActionPluginUpdater(InterfaceActionBase):

 plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
        ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
-        ActionFetchNews, ActionSaveToDisk, ActionQuickview, #ActionPolish,
+        ActionFetchNews, ActionSaveToDisk, ActionQuickview, ActionPolish,
        ActionShowBookDetails,ActionRestart, ActionOpenFolder, ActionConnectShare,
        ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
        ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -17,7 +17,6 @@ from calibre.devices.interface import DevicePlugin
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
                                 plugin_dir, OptionParser)
-from calibre.ebooks.epub.fix import ePubFixer
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.constants import DEBUG

@ -489,15 +488,6 @@ def disabled_device_plugins():
                    yield plugin
 # }}}

-# epub fixers {{{
-def epub_fixers():
-    for plugin in _initialized_plugins:
-        if isinstance(plugin, ePubFixer):
-            if not is_disabled(plugin):
-                if platform in plugin.supported_platforms:
-                    yield plugin
-# }}}
-
 # Metadata sources2 {{{
 def metadata_plugins(capabilities):
    capabilities = frozenset(capabilities)
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -16,15 +16,14 @@ import apsw
 from calibre import isbytestring, force_unicode, prints
 from calibre.constants import (iswindows, filesystem_encoding,
        preferred_encoding)
-from calibre.ptempfile import PersistentTemporaryFile, SpooledTemporaryFile
-from calibre.db import SPOOL_SIZE
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.db.schema_upgrades import SchemaUpgrade
 from calibre.library.field_metadata import FieldMetadata
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.utils.icu import strcmp
 from calibre.utils.config import to_json, from_json, prefs, tweaks
 from calibre.utils.date import utcfromtimestamp, parse_date
-from calibre.utils.filenames import is_case_sensitive
+from calibre.utils.filenames import (is_case_sensitive, samefile, hardlink_file)
 from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
        SizeTable, FormatsTable, AuthorsTable, IdentifiersTable,
        CompositeTable, LanguagesTable)
@ -855,38 +854,75 @@ class DB(object):
        ans = {}
        if path is not None:
            stat = os.stat(path)
+            ans['path'] = path
            ans['size'] = stat.st_size
            ans['mtime'] = utcfromtimestamp(stat.st_mtime)
        return ans

-    def cover(self, path, as_file=False, as_image=False,
-            as_path=False):
+    def has_format(self, book_id, fmt, fname, path):
+        return self.format_abspath(book_id, fmt, fname, path) is not None
+
+    def copy_cover_to(self, path, dest, windows_atomic_move=None, use_hardlink=False):
        path = os.path.join(self.library_path, path, 'cover.jpg')
-        ret = None
-        if os.access(path, os.R_OK):
-            try:
+        if windows_atomic_move is not None:
+            if not isinstance(dest, basestring):
+                raise Exception("Error, you must pass the dest as a path when"
+                        " using windows_atomic_move")
+            if os.access(path, os.R_OK) and dest and not samefile(dest, path):
+                windows_atomic_move.copy_path_to(path, dest)
+                return True
+        else:
+            if os.access(path, os.R_OK):
+                try:
+                    f = lopen(path, 'rb')
+                except (IOError, OSError):
+                    time.sleep(0.2)
                f = lopen(path, 'rb')
-            except (IOError, OSError):
-                time.sleep(0.2)
-                f = lopen(path, 'rb')
-            with f:
-                if as_path:
-                    pt = PersistentTemporaryFile('_dbcover.jpg')
-                    with pt:
-                        shutil.copyfileobj(f, pt)
-                    return pt.name
-                if as_file:
-                    ret = SpooledTemporaryFile(SPOOL_SIZE)
-                    shutil.copyfileobj(f, ret)
-                    ret.seek(0)
-                else:
-                    ret = f.read()
-                    if as_image:
-                        from PyQt4.Qt import QImage
-                        i = QImage()
-                        i.loadFromData(ret)
-                        ret = i
-        return ret
+                with f:
+                    if hasattr(dest, 'write'):
+                        shutil.copyfileobj(f, dest)
+                        if hasattr(dest, 'flush'):
+                            dest.flush()
+                        return True
+                    elif dest and not samefile(dest, path):
+                        if use_hardlink:
+                            try:
+                                hardlink_file(path, dest)
+                                return True
+                            except:
+                                pass
+                        with lopen(dest, 'wb') as d:
+                            shutil.copyfileobj(f, d)
+                        return True
+        return False
+
+    def copy_format_to(self, book_id, fmt, fname, path, dest,
+                       windows_atomic_move=None, use_hardlink=False):
+        path = self.format_abspath(book_id, fmt, fname, path)
+        if path is None:
+            return False
+        if windows_atomic_move is not None:
+            if not isinstance(dest, basestring):
+                raise Exception("Error, you must pass the dest as a path when"
+                        " using windows_atomic_move")
+            if dest and not samefile(dest, path):
+                windows_atomic_move.copy_path_to(path, dest)
+        else:
+            if hasattr(dest, 'write'):
+                with lopen(path, 'rb') as f:
+                    shutil.copyfileobj(f, dest)
+                if hasattr(dest, 'flush'):
+                    dest.flush()
+            elif dest and not samefile(dest, path):
+                if use_hardlink:
+                    try:
+                        hardlink_file(path, dest)
+                        return True
+                    except:
+                        pass
+                with lopen(path, 'rb') as f, lopen(dest, 'wb') as d:
+                    shutil.copyfileobj(f, d)
+        return True

   # }}}

--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -8,16 +8,21 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os, traceback
+from io import BytesIO
 from collections import defaultdict
 from functools import wraps, partial

+from calibre.db import SPOOL_SIZE
 from calibre.db.categories import get_categories
 from calibre.db.locking import create_locks, RecordLock
+from calibre.db.errors import NoSuchFormat
 from calibre.db.fields import create_field
 from calibre.db.search import Search
 from calibre.db.tables import VirtualTable
 from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.ebooks.metadata.book.base import Metadata
+from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
+                               SpooledTemporaryFile)
 from calibre.utils.date import now
 from calibre.utils.icu import sort_key

@ -103,27 +108,6 @@ class Cache(object):
    def field_metadata(self):
        return self.backend.field_metadata

-    def _format_abspath(self, book_id, fmt):
-        '''
-        Return absolute path to the ebook file of format `format`
-
-        WARNING: This method will return a dummy path for a network backend DB,
-        so do not rely on it, use format(..., as_path=True) instead.
-
-        Currently used only in calibredb list, the viewer and the catalogs (via
-        get_data_as_dict()).
-
-        Apart from the viewer, I don't believe any of the others do any file
-        I/O with the results of this call.
-        '''
-        try:
-            name = self.fields['formats'].format_fname(book_id, fmt)
-            path = self._field_for('path', book_id).replace('/', os.sep)
-        except:
-            return None
-        if name and path:
-            return self.backend.format_abspath(book_id, fmt, name, path)
-
    def _get_metadata(self, book_id, get_user_categories=True): # {{{
        mi = Metadata(None, template_cache=self.formatter_template_cache)
        author_ids = self._field_ids_for('authors', book_id)
@ -162,7 +146,7 @@ class Cache(object):
        if not formats:
            good_formats = None
        else:
-            mi.format_metadata = FormatMetadata(self, id, formats)
+            mi.format_metadata = FormatMetadata(self, book_id, formats)
            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        mi.has_cover = _('Yes') if self._field_for('cover', book_id,
@ -227,6 +211,12 @@ class Cache(object):
            self.fields['ondevice'] = create_field('ondevice',
                    VirtualTable('ondevice'))

+            for name, field in self.fields.iteritems():
+                if name[0] == '#' and name.endswith('_index'):
+                    field.series_field = self.fields[name[:-len('_index')]]
+                elif name == 'series_index':
+                    field.series_field = self.fields['series']
+
    @read_api
    def field_for(self, name, book_id, default_value=None):
        '''
@ -397,15 +387,184 @@ class Cache(object):
        :param as_path: If True return the image as a path pointing to a
                        temporary file
        '''
+        if as_file:
+            ret = SpooledTemporaryFile(SPOOL_SIZE)
+            if not self.copy_cover_to(book_id, ret): return
+            ret.seek(0)
+        elif as_path:
+            pt = PersistentTemporaryFile('_dbcover.jpg')
+            with pt:
+                if not self.copy_cover_to(book_id, pt): return
+            ret = pt.name
+        else:
+            buf = BytesIO()
+            if not self.copy_cover_to(book_id, buf): return
+            ret = buf.getvalue()
+            if as_image:
+                from PyQt4.Qt import QImage
+                i = QImage()
+                i.loadFromData(ret)
+                ret = i
+        return ret
+
+    @api
+    def copy_cover_to(self, book_id, dest, use_hardlink=False):
+        '''
+        Copy the cover to the file like object ``dest``. Returns False
+        if no cover exists or dest is the same file as the current cover.
+        dest can also be a path in which case the cover is
+        copied to it iff the path is different from the current path (taking
+        case sensitivity into account).
+        '''
        with self.read_lock:
            try:
                path = self._field_for('path', book_id).replace('/', os.sep)
            except:
-                return None
+                return False

        with self.record_lock.lock(book_id):
-            return self.backend.cover(path, as_file=as_file, as_image=as_image,
-                    as_path=as_path)
+            return self.backend.copy_cover_to(path, dest,
+                                              use_hardlink=use_hardlink)
+
+    @api
+    def copy_format_to(self, book_id, fmt, dest, use_hardlink=False):
+        '''
+        Copy the format ``fmt`` to the file like object ``dest``. If the
+        specified format does not exist, raises :class:`NoSuchFormat` error.
+        dest can also be a path, in which case the format is copied to it, iff
+        the path is different from the current path (taking case sensitivity
+        into account).
+        '''
+        with self.read_lock:
+            try:
+                name = self.fields['formats'].format_fname(book_id, fmt)
+                path = self._field_for('path', book_id).replace('/', os.sep)
+            except:
+                raise NoSuchFormat('Record %d has no %s file'%(book_id, fmt))
+
+        with self.record_lock.lock(book_id):
+            return self.backend.copy_format_to(book_id, fmt, name, path, dest,
+                                               use_hardlink=use_hardlink)
+
+    @read_api
+    def format_abspath(self, book_id, fmt):
+        '''
+        Return absolute path to the ebook file of format `format`
+
+        Currently used only in calibredb list, the viewer and the catalogs (via
+        get_data_as_dict()).
+
+        Apart from the viewer, I don't believe any of the others do any file
+        I/O with the results of this call.
+        '''
+        try:
+            name = self.fields['formats'].format_fname(book_id, fmt)
+            path = self._field_for('path', book_id).replace('/', os.sep)
+        except:
+            return None
+        if name and path:
+            return self.backend.format_abspath(book_id, fmt, name, path)
+
+    @read_api
+    def has_format(self, book_id, fmt):
+        'Return True iff the format exists on disk'
+        try:
+            name = self.fields['formats'].format_fname(book_id, fmt)
+            path = self._field_for('path', book_id).replace('/', os.sep)
+        except:
+            return False
+        return self.backend.has_format(book_id, fmt, name, path)
+
+    @read_api
+    def formats(self, book_id, verify_formats=True):
+        '''
+        Return tuple of all formats for the specified book. If verify_formats
+        is True, verifies that the files exist on disk.
+        '''
+        ans = self.field_for('formats', book_id)
+        if verify_formats and ans:
+            try:
+                path = self._field_for('path', book_id).replace('/', os.sep)
+            except:
+                return ()
+            def verify(fmt):
+                try:
+                    name = self.fields['formats'].format_fname(book_id, fmt)
+                except:
+                    return False
+                return self.backend.has_format(book_id, fmt, name, path)
+
+            ans = tuple(x for x in ans if verify(x))
+        return ans
+
+    @api
+    def format(self, book_id, fmt, as_file=False, as_path=False, preserve_filename=False):
+        '''
+        Return the ebook format as a bytestring or `None` if the format doesn't exist,
+        or we don't have permission to write to the ebook file.
+
+        :param as_file: If True the ebook format is returned as a file object. Note
+                        that the file object is a SpooledTemporaryFile, so if what you want to
+                        do is copy the format to another file, use :method:`copy_format_to`
+                        instead for performance.
+        :param as_path: Copies the format file to a temp file and returns the
+                        path to the temp file
+        :param preserve_filename: If True and returning a path the filename is
+                                  the same as that used in the library. Note that using
+                                  this means that repeated calls yield the same
+                                  temp file (which is re-created each time)
+        '''
+        with self.read_lock:
+            ext = ('.'+fmt.lower()) if fmt else ''
+            try:
+                fname = self.fields['formats'].format_fname(book_id, fmt)
+            except:
+                return None
+            fname += ext
+
+        if as_path:
+            if preserve_filename:
+                bd = base_dir()
+                d = os.path.join(bd, 'format_abspath')
+                try:
+                    os.makedirs(d)
+                except:
+                    pass
+                ret = os.path.join(d, fname)
+                with self.record_lock.lock(book_id):
+                    try:
+                        self.copy_format_to(book_id, fmt, ret)
+                    except NoSuchFormat:
+                        return None
+            else:
+                with PersistentTemporaryFile(ext) as pt, self.record_lock.lock(book_id):
+                    try:
+                        self.copy_format_to(book_id, fmt, pt)
+                    except NoSuchFormat:
+                        return None
+                    ret = pt.name
+        elif as_file:
+            ret = SpooledTemporaryFile(SPOOL_SIZE)
+            with self.record_lock.lock(book_id):
+                try:
+                    self.copy_format_to(book_id, fmt, ret)
+                except NoSuchFormat:
+                    return None
+            ret.seek(0)
+            # Various bits of code try to use the name as the default
+            # title when reading metadata, so set it
+            ret.name = fname
+        else:
+            buf = BytesIO()
+            with self.record_lock.lock(book_id):
+                try:
+                    self.copy_format_to(book_id, fmt, buf)
+                except NoSuchFormat:
+                    return None
+
+            ret = buf.getvalue()
+
+        return ret

    @read_api
    def multisort(self, fields, ids_to_sort=None):
@ -455,6 +614,14 @@ class Cache(object):
        return get_categories(self, sort=sort, book_ids=book_ids,
                              icon_map=icon_map)

+    @write_api
+    def set_field(self, name, book_id_to_val_map):
+        # TODO: Specialize title/authors to also update path
+        # TODO: Handle updating caches used by composite fields
+        dirtied = self.fields[name].writer.set_books(
+            book_id_to_val_map, self.backend)
+        return dirtied
+
    # }}}

 class SortKey(object):
--- a/src/calibre/db/categories.py
+++ b/src/calibre/db/categories.py
@ -12,6 +12,7 @@ from functools import partial
 from operator import attrgetter
 from future_builtins import map

+from calibre.ebooks.metadata import author_to_author_sort
 from calibre.library.field_metadata import TagsIcons
 from calibre.utils.config_base import tweaks
 from calibre.utils.icu import sort_key
@ -149,8 +150,16 @@ def get_categories(dbcache, sort='name', book_ids=None, icon_map=None):
        elif category == 'news':
            cats = dbcache.fields['tags'].get_news_category(tag_class, book_ids)
        else:
+            cat = fm[category]
+            brm = book_rating_map
+            if cat['datatype'] == 'rating' and category != 'rating':
+                brm = dbcache.fields[category].book_value_map
            cats = dbcache.fields[category].get_categories(
-                tag_class, book_rating_map, lang_map, book_ids)
+                tag_class, brm, lang_map, book_ids)
+            if (category != 'authors' and cat['datatype'] == 'text' and
+                cat['is_multiple'] and cat['display'].get('is_names', False)):
+                for item in cats:
+                    item.sort = author_to_author_sort(item.sort)
        sort_categories(cats, sort)
        categories[category] = cats

--- a/src/calibre/db/fields.py
+++ b/src/calibre/db/fields.py
@ -12,6 +12,7 @@ from threading import Lock
 from collections import defaultdict, Counter

 from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
+from calibre.db.write import Writer
 from calibre.ebooks.metadata import title_sort
 from calibre.utils.config_base import tweaks
 from calibre.utils.icu import sort_key
@ -44,6 +45,8 @@ class Field(object):
            self.category_formatter = lambda x:'\u2605'*int(x/2)
        elif name == 'languages':
            self.category_formatter = calibre_langcode_to_name
+        self.writer = Writer(self)
+        self.series_field = None

    @property
    def metadata(self):
--- a/src/calibre/db/tests/base.py
+++ b/src/calibre/db/tests/base.py
@ -7,19 +7,36 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import unittest, os, shutil
+import unittest, os, shutil, tempfile, atexit
+from functools import partial
+from io import BytesIO
 from future_builtins import map

+rmtree = partial(shutil.rmtree, ignore_errors=True)
+
 class BaseTest(unittest.TestCase):

+    def setUp(self):
+        self.library_path = self.mkdtemp()
+        self.create_db(self.library_path)
+
+    def tearDown(self):
+        shutil.rmtree(self.library_path)
+
    def create_db(self, library_path):
        from calibre.library.database2 import LibraryDatabase2
        if LibraryDatabase2.exists_at(library_path):
            raise ValueError('A library already exists at %r'%library_path)
        src = os.path.join(os.path.dirname(__file__), 'metadata.db')
-        db = os.path.join(library_path, 'metadata.db')
-        shutil.copyfile(src, db)
-        return db
+        dest = os.path.join(library_path, 'metadata.db')
+        shutil.copyfile(src, dest)
+        db = LibraryDatabase2(library_path)
+        db.set_cover(1, I('lt.png', data=True))
+        db.set_cover(2, I('polish.png', data=True))
+        db.add_format(1, 'FMT1', BytesIO(b'book1fmt1'), index_is_id=True)
+        db.add_format(1, 'FMT2', BytesIO(b'book1fmt2'), index_is_id=True)
+        db.add_format(2, 'FMT1', BytesIO(b'book2fmt1'), index_is_id=True)
+        return dest

    def init_cache(self, library_path):
        from calibre.db.backend import DB
@ -29,20 +46,38 @@ class BaseTest(unittest.TestCase):
        cache.init()
        return cache

+    def mkdtemp(self):
+        ans = tempfile.mkdtemp(prefix='db_test_')
+        atexit.register(rmtree, ans)
+        return ans
+
+    def init_old(self, library_path):
+        from calibre.library.database2 import LibraryDatabase2
+        return LibraryDatabase2(library_path)
+
+    def clone_library(self, library_path):
+        if not hasattr(self, 'clone_dir'):
+            self.clone_dir = tempfile.mkdtemp()
+            atexit.register(rmtree, self.clone_dir)
+            self.clone_count = 0
+        self.clone_count += 1
+        dest = os.path.join(self.clone_dir, str(self.clone_count))
+        shutil.copytree(library_path, dest)
+        return dest
+
    def compare_metadata(self, mi1, mi2):
        allfk1 = mi1.all_field_keys()
        allfk2 = mi2.all_field_keys()
        self.assertEqual(allfk1, allfk2)

        all_keys = {'format_metadata', 'id', 'application_id',
-                'author_sort_map', 'author_link_map', 'book_size',
-                'ondevice_col', 'last_modified'}.union(allfk1)
+                    'author_sort_map', 'author_link_map', 'book_size',
+                    'ondevice_col', 'last_modified', 'has_cover',
+                    'cover_data'}.union(allfk1)
        for attr in all_keys:
            if attr == 'user_metadata': continue
-            if attr == 'format_metadata': continue # TODO: Not implemented yet
            attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr)
            if attr == 'formats':
-                continue # TODO: Not implemented yet
                attr1, attr2 = map(lambda x:tuple(x) if x else (), (attr1, attr2))
            self.assertEqual(attr1, attr2,
                    '%s not the same: %r != %r'%(attr, attr1, attr2))
--- a/src/calibre/db/tests/reading.py
+++ b/src/calibre/db/tests/reading.py
@ -7,21 +7,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import shutil, unittest, tempfile, datetime
-from cStringIO import StringIO
+import unittest, datetime

 from calibre.utils.date import utc_tz
 from calibre.db.tests.base import BaseTest

 class ReadingTest(BaseTest):

-    def setUp(self):
-        self.library_path = tempfile.mkdtemp()
-        self.create_db(self.library_path)
-
-    def tearDown(self):
-        shutil.rmtree(self.library_path)
-
    def test_read(self): # {{{
        'Test the reading of data from the database'
        cache = self.init_cache(self.library_path)
@ -55,7 +47,7 @@ class ReadingTest(BaseTest):
                    '#tags':(),
                    '#yesno':None,
                    '#comments': None,
-
+                    'size':None,
                },

                2 : {
@ -66,7 +58,7 @@ class ReadingTest(BaseTest):
                    'series' : 'A Series One',
                    'series_index': 1.0,
                    'tags':('Tag One', 'Tag Two'),
-                    'formats': (),
+                    'formats': ('FMT1',),
                    'rating': 4.0,
                    'identifiers': {'test':'one'},
                    'timestamp': datetime.datetime(2011, 9, 5, 21, 6,
@ -86,6 +78,7 @@ class ReadingTest(BaseTest):
                    '#tags':('My Tag One', 'My Tag Two'),
                    '#yesno':True,
                    '#comments': '<div>My Comments One<p></p></div>',
+                    'size':9,
                },
                1  : {
                    'title': 'Title Two',
@ -96,7 +89,7 @@ class ReadingTest(BaseTest):
                    'series_index': 2.0,
                    'rating': 6.0,
                    'tags': ('Tag One', 'News'),
-                    'formats':(),
+                    'formats':('FMT1', 'FMT2'),
                    'identifiers': {'test':'two'},
                    'timestamp': datetime.datetime(2011, 9, 6, 6, 0,
                        tzinfo=utc_tz),
@ -115,6 +108,7 @@ class ReadingTest(BaseTest):
                    '#tags':('My Tag Two',),
                    '#yesno':False,
                    '#comments': '<div>My Comments Two<p></p></div>',
+                    'size':9,

                },
        }
@ -172,22 +166,41 @@ class ReadingTest(BaseTest):
        'Test get_metadata() returns the same data for both backends'
        from calibre.library.database2 import LibraryDatabase2
        old = LibraryDatabase2(self.library_path)
-        for i in xrange(1, 3):
-            old.add_format(i, 'txt%d'%i, StringIO(b'random%d'%i),
-                    index_is_id=True)
-            old.add_format(i, 'text%d'%i, StringIO(b'random%d'%i),
-                    index_is_id=True)
-
-        old_metadata = {i:old.get_metadata(i, index_is_id=True) for i in
+        old_metadata = {i:old.get_metadata(
+            i, index_is_id=True, get_cover=True, cover_as_data=True) for i in
                xrange(1, 4)}
+        for mi in old_metadata.itervalues():
+            mi.format_metadata = dict(mi.format_metadata)
+            if mi.formats:
+                mi.formats = tuple(mi.formats)
        old = None

        cache = self.init_cache(self.library_path)

-        new_metadata = {i:cache.get_metadata(i) for i in xrange(1, 4)}
+        new_metadata = {i:cache.get_metadata(
+            i, get_cover=True, cover_as_data=True) for i in xrange(1, 4)}
        cache = None
        for mi2, mi1 in zip(new_metadata.values(), old_metadata.values()):
            self.compare_metadata(mi1, mi2)
+    # }}}
+
+    def test_get_cover(self): # {{{
+        'Test cover() returns the same data for both backends'
+        from calibre.library.database2 import LibraryDatabase2
+        old = LibraryDatabase2(self.library_path)
+        covers = {i: old.cover(i, index_is_id=True) for i in old.all_ids()}
+        old = None
+        cache = self.init_cache(self.library_path)
+        for book_id, cdata in covers.iteritems():
+            self.assertEqual(cdata, cache.cover(book_id), 'Reading of cover failed')
+            f = cache.cover(book_id, as_file=True)
+            self.assertEqual(cdata, f.read() if f else f, 'Reading of cover as file failed')
+            if cdata:
+                with open(cache.cover(book_id, as_path=True), 'rb') as f:
+                    self.assertEqual(cdata, f.read(), 'Reading of cover as path failed')
+            else:
+                self.assertEqual(cdata, cache.cover(book_id, as_path=True),
+                                 'Reading of null cover as path failed')

    # }}}

@ -227,8 +240,12 @@ class ReadingTest(BaseTest):
            # User categories
            '@Good Authors:One', '@Good Series.good tags:two',

-            # TODO: Tests for searching the size and #formats columns and
-            # cover:true|false
+            # Cover/Formats
+            'cover:true', 'cover:false', 'formats:true', 'formats:false',
+            'formats:#>1', 'formats:#=1', 'formats:=fmt1', 'formats:=fmt2',
+            'formats:=fmt1 or formats:fmt2', '#formats:true', '#formats:false',
+            '#formats:fmt1', '#formats:fmt2', '#formats:fmt1 and #formats:fmt2',
+
        )}
        old = None

@ -247,9 +264,67 @@ class ReadingTest(BaseTest):
        old = LibraryDatabase2(self.library_path)
        old_categories = old.get_categories()
        cache = self.init_cache(self.library_path)
-        import pprint
-        pprint.pprint(old_categories)
-        pprint.pprint(cache.get_categories())
+        new_categories = cache.get_categories()
+        self.assertEqual(set(old_categories), set(new_categories),
+            'The set of old categories is not the same as the set of new categories')
+
+        def compare_category(category, old, new):
+            for attr in ('name', 'original_name', 'id', 'count',
+                         'is_hierarchical', 'is_editable', 'is_searchable',
+                         'id_set', 'avg_rating', 'sort', 'use_sort_as_name',
+                         'tooltip', 'icon', 'category'):
+                oval, nval = getattr(old, attr), getattr(new, attr)
+                if (
+                    (category in {'rating', '#rating'} and attr in {'id_set', 'sort'}) or
+                    (category == 'series' and attr == 'sort') or # Sorting is wrong in old
+                    (category == 'identifiers' and attr == 'id_set') or
+                    (category == '@Good Series') or # Sorting is wrong in old
+                    (category == 'news' and attr in {'count', 'id_set'}) or
+                    (category == 'formats' and attr == 'id_set')
+                ):
+                    continue
+                self.assertEqual(oval, nval,
+                    'The attribute %s for %s in category %s does not match. Old is %r, New is %r'
+                                %(attr, old.name, category, oval, nval))
+
+        for category in old_categories:
+            old, new = old_categories[category], new_categories[category]
+            self.assertEqual(len(old), len(new),
+                'The number of items in the category %s is not the same'%category)
+            for o, n in zip(old, new):
+                compare_category(category, o, n)
+
+    # }}}
+
+    def test_get_formats(self): # {{{
+        'Test reading ebook formats using the format() method'
+        from calibre.library.database2 import LibraryDatabase2
+        old = LibraryDatabase2(self.library_path)
+        ids = old.all_ids()
+        lf = {i:set(old.formats(i, index_is_id=True).split(',')) if old.formats(
+            i, index_is_id=True) else set() for i in ids}
+        formats = {i:{f:old.format(i, f, index_is_id=True) for f in fmts} for
+                   i, fmts in lf.iteritems()}
+        old = None
+        cache = self.init_cache(self.library_path)
+        for book_id, fmts in lf.iteritems():
+            self.assertEqual(fmts, set(cache.formats(book_id)),
+                             'Set of formats is not the same')
+            for fmt in fmts:
+                old = formats[book_id][fmt]
+                self.assertEqual(old, cache.format(book_id, fmt),
+                                 'Old and new format disagree')
+                f = cache.format(book_id, fmt, as_file=True)
+                self.assertEqual(old, f.read(),
+                                 'Failed to read format as file')
+                with open(cache.format(book_id, fmt, as_path=True,
+                                       preserve_filename=True), 'rb') as f:
+                    self.assertEqual(old, f.read(),
+                                 'Failed to read format as path')
+                with open(cache.format(book_id, fmt, as_path=True), 'rb') as f:
+                    self.assertEqual(old, f.read(),
+                                 'Failed to read format as path')
+

    # }}}

--- a/src/calibre/db/tests/writing.py
+++ b/src/calibre/db/tests/writing.py
@ -0,0 +1,127 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import unittest
+from collections import namedtuple
+from functools import partial
+
+from calibre.utils.date import UNDEFINED_DATE
+from calibre.db.tests.base import BaseTest
+
+class WritingTest(BaseTest):
+
+    @property
+    def cloned_library(self):
+        return self.clone_library(self.library_path)
+
+    def create_getter(self, name, getter=None):
+        if getter is None:
+            if name.endswith('_index'):
+                ans = lambda db:partial(db.get_custom_extra, index_is_id=True,
+                                        label=name[1:].replace('_index', ''))
+            else:
+                ans = lambda db:partial(db.get_custom, label=name[1:],
+                                       index_is_id=True)
+        else:
+            ans = lambda db:partial(getattr(db, getter), index_is_id=True)
+        return ans
+
+    def create_setter(self, name, setter=None):
+        if setter is None:
+            ans = lambda db:partial(db.set_custom, label=name[1:], commit=True)
+        else:
+            ans = lambda db:partial(getattr(db, setter), commit=True)
+        return ans
+
+    def create_test(self, name, vals, getter=None, setter=None ):
+        T = namedtuple('Test', 'name vals getter setter')
+        return T(name, vals, self.create_getter(name, getter),
+                 self.create_setter(name, setter))
+
+    def run_tests(self, tests):
+        results = {}
+        for test in tests:
+            results[test] = []
+            for val in test.vals:
+                cl = self.cloned_library
+                cache = self.init_cache(cl)
+                cache.set_field(test.name, {1: val})
+                cached_res = cache.field_for(test.name, 1)
+                del cache
+                db = self.init_old(cl)
+                getter = test.getter(db)
+                sqlite_res = getter(1)
+                if test.name.endswith('_index'):
+                    val = float(val) if val is not None else 1.0
+                    self.assertEqual(sqlite_res, val,
+                        'Failed setting for %s with value %r, sqlite value not the same. val: %r != sqlite_val: %r'%(
+                            test.name, val, val, sqlite_res))
+                else:
+                    test.setter(db)(1, val)
+                    old_cached_res = getter(1)
+                    self.assertEqual(old_cached_res, cached_res,
+                                    'Failed setting for %s with value %r, cached value not the same. Old: %r != New: %r'%(
+                            test.name, val, old_cached_res, cached_res))
+                    db.refresh()
+                    old_sqlite_res = getter(1)
+                    self.assertEqual(old_sqlite_res, sqlite_res,
+                        'Failed setting for %s, sqlite value not the same: %r != %r'%(
+                            test.name, old_sqlite_res, sqlite_res))
+                del db
+
+    def test_one_one(self):
+        'Test setting of values in one-one fields'
+        tests = [self.create_test('#yesno', (True, False, 'true', 'false', None))]
+        for name, getter, setter in (
+            ('#series_index', None, None),
+            ('series_index', 'series_index', 'set_series_index'),
+            ('#float', None, None),
+        ):
+            vals = ['1.5', None, 0, 1.0]
+            tests.append(self.create_test(name, tuple(vals), getter, setter))
+
+        for name, getter, setter in (
+            ('pubdate', 'pubdate', 'set_pubdate'),
+            ('timestamp', 'timestamp', 'set_timestamp'),
+            ('#date', None, None),
+        ):
+            tests.append(self.create_test(
+                name, ('2011-1-12', UNDEFINED_DATE, None), getter, setter))
+
+        for name, getter, setter in (
+            ('title', 'title', 'set_title'),
+            ('uuid', 'uuid', 'set_uuid'),
+            ('author_sort', 'author_sort', 'set_author_sort'),
+            ('sort', 'title_sort', 'set_title_sort'),
+            ('#comments', None, None),
+            ('comments', 'comments', 'set_comment'),
+        ):
+            vals = ['something', None]
+            if name not in {'comments', '#comments'}:
+                # Setting text column to '' returns None in the new backend
+                # and '' in the old. I think None is more correct.
+                vals.append('')
+            if name == 'comments':
+                # Again new behavior of deleting comment rather than setting
+                # empty string is more correct.
+                vals.remove(None)
+            tests.append(self.create_test(name, tuple(vals), getter, setter))
+
+        self.run_tests(tests)
+
+def tests():
+    return unittest.TestLoader().loadTestsFromTestCase(WritingTest)
+
+def run():
+    unittest.TextTestRunner(verbosity=2).run(tests())
+
+if __name__ == '__main__':
+    run()
+
+
--- a/src/calibre/db/write.py
+++ b/src/calibre/db/write.py
@ -0,0 +1,190 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from functools import partial
+from datetime import datetime
+
+from calibre.constants import preferred_encoding, ispy3
+from calibre.utils.date import (parse_only_date, parse_date, UNDEFINED_DATE,
+                                isoformat)
+if ispy3:
+    unicode = str
+
+# Convert data into values suitable for the db {{{
+
+def sqlite_datetime(x):
+    return isoformat(x, sep=' ') if isinstance(x, datetime) else x
+
+def single_text(x):
+    if x is None:
+        return x
+    if not isinstance(x, unicode):
+        x = x.decode(preferred_encoding, 'replace')
+    x = x.strip()
+    return x if x else None
+
+def multiple_text(sep, x):
+    if x is None:
+        return ()
+    if isinstance(x, bytes):
+        x = x.decode(preferred_encoding, 'replce')
+    if isinstance(x, unicode):
+        x = x.split(sep)
+    x = (y.strip() for y in x if y.strip())
+    return (' '.join(y.split()) for y in x if y)
+
+def adapt_datetime(x):
+    if isinstance(x, (unicode, bytes)):
+        x = parse_date(x, assume_utc=False, as_utc=False)
+    return x
+
+def adapt_date(x):
+    if isinstance(x, (unicode, bytes)):
+        x = parse_only_date(x)
+    if x is None:
+        x = UNDEFINED_DATE
+    return x
+
+def adapt_number(typ, x):
+    if x is None:
+        return None
+    if isinstance(x, (unicode, bytes)):
+        if x.lower() == 'none':
+            return None
+    return typ(x)
+
+def adapt_bool(x):
+    if isinstance(x, (unicode, bytes)):
+        x = x.lower()
+        if x == 'true':
+            x = True
+        elif x == 'false':
+            x = False
+        elif x == 'none':
+            x = None
+        else:
+            x = bool(int(x))
+    return x if x is None else bool(x)
+
+def get_adapter(name, metadata):
+    dt = metadata['datatype']
+    if dt == 'text':
+        if metadata['is_multiple']:
+            ans = partial(multiple_text, metadata['is_multiple']['ui_to_list'])
+        else:
+            ans = single_text
+    elif dt == 'series':
+        ans = single_text
+    elif dt == 'datetime':
+        ans = adapt_date if name == 'pubdate' else adapt_datetime
+    elif dt == 'int':
+        ans = partial(adapt_number, int)
+    elif dt == 'float':
+        ans = partial(adapt_number, float)
+    elif dt == 'bool':
+        ans = adapt_bool
+    elif dt == 'comments':
+        ans = single_text
+    elif dt == 'rating':
+        ans = lambda x: x if x is None else min(10., max(0., adapt_number(float, x))),
+    elif dt == 'enumeration':
+        ans = single_text
+    elif dt == 'composite':
+        ans = lambda x: x
+
+    if name == 'title':
+        return lambda x: ans(x) or _('Unknown')
+    if name == 'author_sort':
+        return lambda x: ans(x) or ''
+    if name == 'authors':
+        return lambda x: ans(x) or (_('Unknown'),)
+    if name in {'timestamp', 'last_modified'}:
+        return lambda x: ans(x) or UNDEFINED_DATE
+    if name == 'series_index':
+        return lambda x: 1.0 if ans(x) is None else ans(x)
+
+    return ans
+# }}}
+
+# One-One fields {{{
+def one_one_in_books(book_id_val_map, db, field, *args):
+    'Set a one-one field in the books table'
+    if book_id_val_map:
+        sequence = tuple((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
+        db.conn.executemany(
+            'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence)
+        field.table.book_col_map.update(book_id_val_map)
+    return set(book_id_val_map)
+
+def one_one_in_other(book_id_val_map, db, field, *args):
+    'Set a one-one field in the non-books table, like comments'
+    deleted = tuple((k,) for k, v in book_id_val_map.iteritems() if v is None)
+    if deleted:
+        db.conn.executemany('DELETE FROM %s WHERE book=?'%field.metadata['table'],
+                        deleted)
+        for book_id in book_id_val_map:
+            field.table.book_col_map.pop(book_id, None)
+    updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None}
+    if updated:
+        db.conn.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%(
+            field.metadata['table'], field.metadata['column']),
+            tuple((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
+        field.table.book_col_map.update(updated)
+    return set(book_id_val_map)
+
+def custom_series_index(book_id_val_map, db, field, *args):
+    series_field = field.series_field
+    sequence = []
+    for book_id, sidx in book_id_val_map.iteritems():
+        if sidx is None:
+            sidx = 1.0
+        ids = series_field.ids_for_book(book_id)
+        if ids:
+            sequence.append((sidx, book_id, ids[0]))
+            field.table.book_col_map[book_id] = sidx
+    if sequence:
+        db.conn.executemany('UPDATE %s SET %s=? WHERE book=? AND value=?'%(
+                field.metadata['table'], field.metadata['column']), sequence)
+    return {s[0] for s in sequence}
+# }}}
+
+def dummy(book_id_val_map, *args):
+    return set()
+
+class Writer(object):
+
+    def __init__(self, field):
+        self.adapter = get_adapter(field.name, field.metadata)
+        self.name = field.name
+        self.field = field
+        dt = field.metadata['datatype']
+        self.accept_vals = lambda x: True
+        if dt == 'composite' or field.name in {
+            'id', 'cover', 'size', 'path', 'formats', 'news'}:
+            self.set_books_func = dummy
+        elif self.name[0] == '#' and self.name.endswith('_index'):
+            self.set_books_func = custom_series_index
+        elif field.is_many:
+            # TODO: Implement this
+            pass
+            # TODO: Remember to change commas to | when writing authors to sqlite
+        else:
+            self.set_books_func = (one_one_in_books if field.metadata['table']
+                                   == 'books' else one_one_in_other)
+            if self.name in {'timestamp', 'uuid', 'sort'}:
+                self.accept_vals = bool
+
+    def set_books(self, book_id_val_map, db):
+        book_id_val_map = {k:self.adapter(v) for k, v in
+                           book_id_val_map.iteritems() if self.accept_vals(v)}
+        if not book_id_val_map:
+            return set()
+        dirtied = self.set_books_func(book_id_val_map, db, self.field)
+        return dirtied
+
--- a/src/calibre/devices/iliad/driver.py
+++ b/src/calibre/devices/iliad/driver.py
@ -14,7 +14,7 @@ class ILIAD(USBMS):

    name           = 'IRex Iliad Device Interface'
    description    = _('Communicate with the IRex Iliad eBook reader.')
-    author         = _('John Schember')
+    author         = 'John Schember'
    supported_platforms = ['windows', 'linux']

    # Ordered list of supported formats
--- a/src/calibre/devices/irexdr/driver.py
+++ b/src/calibre/devices/irexdr/driver.py
@ -15,7 +15,7 @@ class IREXDR1000(USBMS):
    name           = 'IRex Digital Reader 1000 Device Interface'
    description    = _('Communicate with the IRex Digital Reader 1000 eBook ' \
        'reader.')
-    author         = _('John Schember')
+    author         = 'John Schember'
    supported_platforms = ['windows', 'osx', 'linux']

    # Ordered list of supported formats
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -209,8 +209,9 @@ class ALURATEK_COLOR(USBMS):

    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'books'

-    VENDOR_NAME = ['USB_2.0', 'EZREADER']
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['USB_FLASH_DRIVER', '.']
+    VENDOR_NAME = ['USB_2.0', 'EZREADER', 'C4+']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['USB_FLASH_DRIVER', '.', 'TOUCH']
+    SCAN_FROM_ROOT = True

 class TREKSTOR(USBMS):

@ -225,6 +226,7 @@ class TREKSTOR(USBMS):

    VENDOR_ID   = [0x1e68]
    PRODUCT_ID  = [0x0041, 0x0042, 0x0052, 0x004e, 0x0056,
+            0x0067, # This is for the Pyrus Mini
            0x003e, # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
            0x5cL, # This is for the 4ink http://www.mobileread.com/forums/showthread.php?t=191318
            ]
@ -234,7 +236,7 @@ class TREKSTOR(USBMS):

    VENDOR_NAME = 'TREKSTOR'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7',
-            'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS']
+            'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS', 'PYRUS_MINI']
    SUPPORTS_SUB_DIRS = True
    SUPPORTS_SUB_DIRS_DEFAULT = False

--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -40,7 +40,7 @@ class USBMS(CLI, Device):
    '''

    description    = _('Communicate with an eBook reader.')
-    author         = _('John Schember')
+    author         = 'John Schember'
    supported_platforms = ['windows', 'osx', 'linux']

    # Store type instances of BookList and Book. We must do this because
--- a/src/calibre/devices/usbobserver/Makefile
+++ b/src/calibre/devices/usbobserver/Makefile
@ -1,8 +0,0 @@
-usbobserver.so : usbobserver.o
-	gcc -arch i386 -arch ppc -bundle usbobserver.o -o usbobserver.so -framework Python -framework IOKit -framework CoreFoundation
-
-usbobserver.o : usbobserver.c
-	gcc -arch i386 -arch ppc -dynamic -I/Library/Frameworks/Python.framework/Versions/Current/Headers -c usbobserver.c -o usbobserver.o 
-
-clean : 
-	rm -f *.o *.so
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -67,6 +67,8 @@ def check_command_line_options(parser, args, log):
            ('-h' in args or '--help' in args):
        log.error('Cannot read from', input)
        raise SystemExit(1)
+    if input.endswith('.recipe') and not os.access(input, os.R_OK):
+        input = args[1]

    output = args[2]
    if (output.startswith('.') and output[:2] not in {'..', '.'} and '/' not in
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -80,7 +80,7 @@ class EPUBInput(InputFormatPlugin):
        guide_cover, guide_elem = None, None
        for guide_elem in opf.iterguide():
            if guide_elem.get('type', '').lower() == 'cover':
-                guide_cover = guide_elem.get('href', '')
+                guide_cover = guide_elem.get('href', '').partition('#')[0]
                break
        if not guide_cover:
            return
@ -103,6 +103,12 @@ class EPUBInput(InputFormatPlugin):
        if not self.for_viewer:
            spine[0].getparent().remove(spine[0])
            removed = guide_cover
+        else:
+            # Ensure the cover is displayed as the first item in the book, some
+            # epub files have it set with linear='no' which causes the cover to
+            # display in the end
+            spine[0].attrib.pop('linear', None)
+            opf.spine[0].is_linear = True
        guide_elem.set('href', 'calibre_raster_cover.jpg')
        from calibre.ebooks.oeb.base import OPF
        t = etree.SubElement(elem[0].getparent(), OPF('item'),
--- a/src/calibre/ebooks/conversion/plugins/oeb_output.py
+++ b/src/calibre/ebooks/conversion/plugins/oeb_output.py
@ -82,8 +82,8 @@ class OEBOutput(OutputFormatPlugin):
                    self.log.warn('The cover image has an id != "cover". Renaming'
                            ' to work around bug in Nook Color')

-                    import uuid
-                    newid = str(uuid.uuid4())
+                    from calibre.ebooks.oeb.base import uuid_id
+                    newid = uuid_id()

                    for item in manifest_items_with_id('cover'):
                        item.set('id', newid)
--- a/src/calibre/ebooks/conversion/plugins/recipe_input.py
+++ b/src/calibre/ebooks/conversion/plugins/recipe_input.py
@ -68,10 +68,15 @@ class RecipeInput(InputFormatPlugin):
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
-                from calibre.web.feeds.recipes.collection import \
-                        get_builtin_recipe_by_title
+                from calibre.web.feeds.recipes.collection import (
+                        get_builtin_recipe_by_title, get_builtin_recipe_titles)
                title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
+                titles = frozenset(get_builtin_recipe_titles())
+                if title not in titles:
+                    title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
+                    title = title.rpartition('.')[0]
+
                raw = get_builtin_recipe_by_title(title, log=log,
                        download_recipe=not opts.dont_download_recipe)
                builtin = False
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -62,6 +62,26 @@ def wrap_lines(match):
    else:
        return ital+' '

+def smarten_punctuation(html, log):
+    from calibre.utils.smartypants import smartyPants
+    from calibre.ebooks.chardet import substitute_entites
+    from calibre.ebooks.conversion.utils import HeuristicProcessor
+    preprocessor = HeuristicProcessor(log=log)
+    from uuid import uuid4
+    start = 'calibre-smartypants-'+str(uuid4())
+    stop = 'calibre-smartypants-'+str(uuid4())
+    html = html.replace('<!--', start)
+    html = html.replace('-->', stop)
+    html = preprocessor.fix_nbsp_indents(html)
+    html = smartyPants(html)
+    html = html.replace(start, '<!--')
+    html = html.replace(stop, '-->')
+    # convert ellipsis to entities to prevent wrapping
+    html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
+    # convert double dashes to em-dash
+    html = re.sub(r'\s--\s', u'\u2014', html)
+    return substitute_entites(html)
+
 class DocAnalysis(object):
    '''
    Provides various text analysis functions to determine how the document is structured.
@ -638,7 +658,7 @@ class HTMLPreProcessor(object):
            html = preprocessor(html)

        if getattr(self.extra_opts, 'smarten_punctuation', False):
-            html = self.smarten_punctuation(html)
+            html = smarten_punctuation(html, self.log)

        try:
            unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
@ -653,23 +673,4 @@ class HTMLPreProcessor(object):

        return html

-    def smarten_punctuation(self, html):
-        from calibre.utils.smartypants import smartyPants
-        from calibre.ebooks.chardet import substitute_entites
-        from calibre.ebooks.conversion.utils import HeuristicProcessor
-        preprocessor = HeuristicProcessor(self.extra_opts, self.log)
-        from uuid import uuid4
-        start = 'calibre-smartypants-'+str(uuid4())
-        stop = 'calibre-smartypants-'+str(uuid4())
-        html = html.replace('<!--', start)
-        html = html.replace('-->', stop)
-        html = preprocessor.fix_nbsp_indents(html)
-        html = smartyPants(html)
-        html = html.replace(start, '<!--')
-        html = html.replace(stop, '-->')
-        # convert ellipsis to entities to prevent wrapping
-        html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
-        # convert double dashes to em-dash
-        html = re.sub(r'\s--\s', u'\u2014', html)
-        return substitute_entites(html)

--- a/src/calibre/ebooks/epub/fix/init.py
+++ b/src/calibre/ebooks/epub/fix/init.py
@ -1,67 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
-from calibre.customize import Plugin
-
-class InvalidEpub(ValueError):
-    pass
-
-class ParseError(ValueError):
-
-    def __init__(self, name, desc):
-        self.name = name
-        self.desc = desc
-        ValueError.__init__(self,
-            _('Failed to parse: %(name)s with error: %(err)s')%dict(
-                name=name, err=desc))
-
-class ePubFixer(Plugin):
-
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Kovid Goyal'
-    type = _('ePub Fixer')
-    can_be_disabled = True
-
-    # API that subclasses must implement {{{
-    @property
-    def short_description(self):
-        raise NotImplementedError
-
-    @property
-    def long_description(self):
-        raise NotImplementedError
-
-    @property
-    def fix_name(self):
-        raise NotImplementedError
-
-    @property
-    def options(self):
-        '''
-        Return a list of 4-tuples
-        (option_name, type, default, help_text)
-        type is one of 'bool', 'int', 'string'
-        '''
-        return []
-
-    def run(self, container, opts, log, fix=False):
-        raise NotImplementedError
-    # }}}
-
-    def add_options_to_parser(self, parser):
-        parser.add_option('--' + self.fix_name.replace('_', '-'),
-                help=self.long_description, action='store_true', default=False)
-        for option in self.options:
-            action = 'store'
-            if option[1] == 'bool':
-                action = 'store_true'
-            kwargs = {'action': action, 'default':option[2], 'help':option[3]}
-            if option[1] != 'bool':
-                kwargs['type'] = option[1]
-            parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
-
--- a/src/calibre/ebooks/epub/fix/container.py
+++ b/src/calibre/ebooks/epub/fix/container.py
@ -1,220 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import os, posixpath, urllib, sys, re
-
-from lxml import etree
-from lxml.etree import XMLSyntaxError
-
-from calibre.ebooks.epub.fix import InvalidEpub, ParseError
-from calibre import guess_type, prepare_string_for_xml
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.constants import iswindows
-from calibre.utils.zipfile import ZipFile, ZIP_STORED
-
-exists, join = os.path.exists, os.path.join
-
-OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
-OPF_NS = 'http://www.idpf.org/2007/opf'
-
-class Container(object):
-
-    META_INF = {
-            'container.xml' : True,
-            'manifest.xml' : False,
-            'encryption.xml' : False,
-            'metadata.xml' : False,
-            'signatures.xml' : False,
-            'rights.xml' : False,
-    }
-
-    def __init__(self, path, log):
-        self.root = os.path.abspath(path)
-        self.log = log
-        self.dirtied = set([])
-        self.cache = {}
-        self.mime_map = {}
-
-        if exists(join(self.root, 'mimetype')):
-            os.remove(join(self.root, 'mimetype'))
-
-        container_path = join(self.root, 'META-INF', 'container.xml')
-        if not exists(container_path):
-            raise InvalidEpub('No META-INF/container.xml in epub')
-        self.container = etree.fromstring(open(container_path, 'rb').read())
-        opf_files = self.container.xpath((
-            r'child::ocf:rootfiles/ocf:rootfile'
-            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
-            ), namespaces={'ocf':OCF_NS}
-        )
-        if not opf_files:
-            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
-        opf_path = os.path.join(self.root,
-                *opf_files[0].get('full-path').split('/'))
-        if not exists(opf_path):
-            raise InvalidEpub('OPF file does not exist at location pointed to'
-                    ' by META-INF/container.xml')
-
-        # Map of relative paths with / separators to absolute
-        # paths on filesystem with os separators
-        self.name_map = {}
-        for dirpath, dirnames, filenames in os.walk(self.root):
-            for f in filenames:
-                path = join(dirpath, f)
-                name = os.path.relpath(path, self.root).replace(os.sep, '/')
-                self.name_map[name] = path
-                if path == opf_path:
-                    self.opf_name = name
-                    self.mime_map[name] = guess_type('a.opf')[0]
-
-        for item in self.opf.xpath(
-                '//opf:manifest/opf:item[@href and @media-type]',
-                namespaces={'opf':OPF_NS}):
-            href = item.get('href')
-            self.mime_map[self.href_to_name(href,
-                posixpath.dirname(self.opf_name))] = item.get('media-type')
-
-    def manifest_worthy_names(self):
-        for name in self.name_map:
-            if name.endswith('.opf'): continue
-            if name.startswith('META-INF') and \
-                    posixpath.basename(name) in self.META_INF: continue
-            yield name
-
-    def delete_name(self, name):
-        self.mime_map.pop(name, None)
-        path = self.name_map[name]
-        os.remove(path)
-        self.name_map.pop(name)
-
-    def manifest_item_for_name(self, name):
-        href = self.name_to_href(name,
-            posixpath.dirname(self.opf_name))
-        q = prepare_string_for_xml(href, attribute=True)
-        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
-                namespaces={'opf':OPF_NS})
-        if not existing:
-            return None
-        return existing[0]
-
-    def add_name_to_manifest(self, name, mt=None):
-        item = self.manifest_item_for_name(name)
-        if item is not None:
-            return
-        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
-        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
-                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
-                id=self.generate_manifest_id())
-        if not mt:
-            mt = guess_type(posixpath.basename(name))[0]
-        if not mt:
-            mt = 'application/octest-stream'
-        item.set('media-type', mt)
-        manifest.append(item)
-        self.fix_tail(item)
-
-    def fix_tail(self, item):
-        '''
-        Designed only to work with self closing elements after item has
-        just been inserted/appended
-        '''
-        parent = item.getparent()
-        idx = parent.index(item)
-        if idx == 0:
-            item.tail = parent.text
-        else:
-            item.tail = parent[idx-1].tail
-            if idx == len(parent)-1:
-                parent[idx-1].tail = parent.text
-
-    def generate_manifest_id(self):
-        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
-                namespaces={'opf':OPF_NS})
-        ids = set([x.get('id') for x in items])
-        for x in xrange(sys.maxint):
-            c = 'id%d'%x
-            if c not in ids:
-                return c
-
-    @property
-    def opf(self):
-        return self.get(self.opf_name)
-
-    def href_to_name(self, href, base=''):
-        href = urllib.unquote(href.partition('#')[0])
-        name = href
-        if base:
-            name = posixpath.join(base, href)
-        return name
-
-    def name_to_href(self, name, base):
-        if not base:
-            return name
-        return posixpath.relpath(name, base)
-
-    def get_raw(self, name):
-        path = self.name_map[name]
-        return open(path, 'rb').read()
-
-    def get(self, name):
-        if name in self.cache:
-            return self.cache[name]
-        raw = self.get_raw(name)
-        if name in self.mime_map:
-            try:
-                raw = self._parse(raw, self.mime_map[name])
-            except XMLSyntaxError as err:
-                raise ParseError(name, unicode(err))
-        self.cache[name] = raw
-        return raw
-
-    def set(self, name, val):
-        self.cache[name] = val
-        self.dirtied.add(name)
-
-    def _parse(self, raw, mimetype):
-        mt = mimetype.lower()
-        if mt.endswith('+xml'):
-            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
-            raw = xml_to_unicode(raw,
-                strip_encoding_pats=True, assume_utf8=True,
-                resolve_entities=True)[0].strip()
-            idx = raw.find('<html')
-            if idx == -1:
-                idx = raw.find('<HTML')
-            if idx > -1:
-                pre = raw[:idx]
-                raw = raw[idx:]
-                if '<!DOCTYPE' in pre:
-                    user_entities = {}
-                    for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
-                        val = match.group(2)
-                        if val.startswith('"') and val.endswith('"'):
-                            val = val[1:-1]
-                        user_entities[match.group(1)] = val
-                    if user_entities:
-                        pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
-                        raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
-            return etree.fromstring(raw, parser=parser)
-        return raw
-
-    def write(self, path):
-        for name in self.dirtied:
-            data = self.cache[name]
-            raw = data
-            if hasattr(data, 'xpath'):
-                raw = etree.tostring(data, encoding='utf-8',
-                        xml_declaration=True)
-            with open(self.name_map[name], 'wb') as f:
-                f.write(raw)
-        self.dirtied.clear()
-        zf = ZipFile(path, 'w')
-        zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
-                compression=ZIP_STORED)
-        zf.add_dir(self.root)
-        zf.close()
-
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@ -1,91 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
-
-
-class Epubcheck(ePubFixer):
-
-    name = 'Workaround epubcheck bugs'
-
-    @property
-    def short_description(self):
-        return _('Workaround epubcheck bugs')
-
-    @property
-    def long_description(self):
-        return _('Workarounds for bugs in the latest release of epubcheck. '
-                'epubcheck reports many things as errors that are not '
-                'actually errors. epub-fix will try to detect these and replace '
-                'them with constructs that epubcheck likes. This may cause '
-                'significant changes to your epub, complain to the epubcheck '
-                'project.')
-
-    @property
-    def description(self):
-        return self.long_description
-
-    @property
-    def fix_name(self):
-        return 'epubcheck'
-
-    def fix_pubdates(self):
-        from calibre.utils.date import parse_date, strptime
-
-        dirtied = False
-        opf = self.container.opf
-        for dcdate in opf.xpath('//dc:date',
-                namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
-            raw = dcdate.text
-            if not raw: raw = ''
-            default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
-            try:
-                ts = parse_date(raw, assume_utc=False, as_utc=True,
-                        default=default)
-            except:
-                raise InvalidEpub('Invalid date set in OPF', raw)
-            try:
-                sval = ts.strftime('%Y-%m-%d')
-            except:
-                from calibre import strftime
-                sval = strftime('%Y-%m-%d', ts.timetuple())
-            if sval != raw:
-                self.log.error(
-                    'OPF contains date', raw, 'that epubcheck does not like')
-                if self.fix:
-                    dcdate.text = sval
-                    self.log('\tReplaced', raw, 'with', sval)
-                    dirtied = True
-        if dirtied:
-            self.container.set(self.container.opf_name, opf)
-
-    def fix_preserve_aspect_ratio(self):
-        for name in self.container.name_map:
-            mt = self.container.mime_map.get(name, '')
-            if mt.lower() == 'application/xhtml+xml':
-                root = self.container.get(name)
-                dirtied = False
-                for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
-                        namespaces={'svg':'http://www.w3.org/2000/svg'}):
-                    self.log.error('Found <svg> element with'
-                            ' preserveAspectRatio="none" which epubcheck '
-                            'cannot handle')
-                    if self.fix:
-                        svg.set('preserveAspectRatio', 'xMidYMid meet')
-                        dirtied = True
-                        self.log('\tReplaced none with xMidYMid meet')
-                if dirtied:
-                    self.container.set(name, root)
-
-
-    def run(self, container, opts, log, fix=False):
-        self.container = container
-        self.opts = opts
-        self.log = log
-        self.fix = fix
-        self.fix_pubdates()
-        self.fix_preserve_aspect_ratio()
--- a/src/calibre/ebooks/epub/fix/main.py
+++ b/src/calibre/ebooks/epub/fix/main.py
@ -1,62 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import sys, os
-
-from calibre.utils.config import OptionParser
-from calibre.ptempfile import TemporaryDirectory
-from calibre import CurrentDir
-from calibre.utils.zipfile import ZipFile
-from calibre.utils.logging import default_log
-from calibre.customize.ui import epub_fixers
-from calibre.ebooks.epub.fix.container import Container
-from calibre.ebooks.epub.fix import ParseError
-
-
-def option_parser():
-    parser = OptionParser(usage=_(
-        '%prog [options] file.epub\n\n'
-        'Fix common problems in EPUB files that can cause them '
-        'to be rejected by poorly designed publishing services.\n\n'
-        'By default, no fixing is done and messages are printed out '
-        'for each error detected. Use the options to control which errors '
-        'are automatically fixed.'))
-    for fixer in epub_fixers():
-        fixer.add_options_to_parser(parser)
-
-    return parser
-
-
-def run(epub, opts, log):
-    with TemporaryDirectory('_epub-fix') as tdir:
-        with CurrentDir(tdir):
-            zf = ZipFile(epub)
-            zf.extractall()
-            zf.close()
-            container = Container(tdir, log)
-            for fixer in epub_fixers():
-                fix = getattr(opts, fixer.fix_name, False)
-                fixer.run(container, opts, log, fix=fix)
-            container.write(epub)
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        default_log.error(_('You must specify an epub file'))
-        return
-    epub = os.path.abspath(args[1])
-    try:
-        run(epub, opts, default_log)
-    except ParseError as err:
-        default_log.error(unicode(err))
-        raise SystemExit(1)
-
-if __name__ == '__main__':
-    main()
--- a/src/calibre/ebooks/epub/fix/unmanifested.py
+++ b/src/calibre/ebooks/epub/fix/unmanifested.py
@ -1,53 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
-from calibre.ebooks.epub.fix import ePubFixer
-
-class Unmanifested(ePubFixer):
-
-    name = 'Fix unmanifested files'
-
-    @property
-    def short_description(self):
-        return _('Fix unmanifested files')
-
-    @property
-    def long_description(self):
-        return _('Fix unmanifested files. epub-fix can either add them to '
-        'the manifest or delete them as specified by the '
-        'delete unmanifested option.')
-
-    @property
-    def description(self):
-        return self.long_description
-
-    @property
-    def fix_name(self):
-        return 'unmanifested'
-
-    @property
-    def options(self):
-        return [('delete_unmanifested', 'bool', False,
-            _('Delete unmanifested files instead of adding them to the manifest'))]
-
-    def run(self, container, opts, log, fix=False):
-        dirtied = False
-        for name in list(container.manifest_worthy_names()):
-            item = container.manifest_item_for_name(name)
-            if item is None:
-                log.error(name, 'not in manifest')
-                if fix:
-                    if opts.delete_unmanifested:
-                        container.delete_name(name)
-                        log('\tDeleted')
-                    else:
-                        container.add_name_to_manifest(name)
-                        log('\tAdded to manifest')
-                        dirtied = True
-        if dirtied:
-            container.set(container.opf_name, container.opf)
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -249,6 +249,30 @@ def _write_new_cover(new_cdata, cpath):
    save_cover_data_to(new_cdata, new_cover.name)
    return new_cover

+def update_metadata(opf, mi, apply_null=False, update_timestamp=False):
+    for x in ('guide', 'toc', 'manifest', 'spine'):
+        setattr(mi, x, None)
+    if mi.languages:
+        langs = []
+        for lc in mi.languages:
+            lc2 = lang_as_iso639_1(lc)
+            if lc2: lc = lc2
+            langs.append(lc)
+        mi.languages = langs
+
+    opf.smart_update(mi)
+    if getattr(mi, 'uuid', None):
+        opf.application_id = mi.uuid
+    if apply_null:
+        if not getattr(mi, 'series', None):
+            opf.series = None
+        if not getattr(mi, 'tags', []):
+            opf.tags = []
+        if not getattr(mi, 'isbn', None):
+            opf.isbn = None
+    if update_timestamp and mi.timestamp is not None:
+        opf.timestamp = mi.timestamp
+
 def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    stream.seek(0)
    reader = get_zip_reader(stream, root=os.getcwdu())
@ -279,29 +303,8 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
            import traceback
            traceback.print_exc()

-    for x in ('guide', 'toc', 'manifest', 'spine'):
-        setattr(mi, x, None)
-    if mi.languages:
-        langs = []
-        for lc in mi.languages:
-            lc2 = lang_as_iso639_1(lc)
-            if lc2: lc = lc2
-            langs.append(lc)
-        mi.languages = langs
-
-
-    reader.opf.smart_update(mi)
-    if getattr(mi, 'uuid', None):
-        reader.opf.application_id = mi.uuid
-    if apply_null:
-        if not getattr(mi, 'series', None):
-            reader.opf.series = None
-        if not getattr(mi, 'tags', []):
-            reader.opf.tags = []
-        if not getattr(mi, 'isbn', None):
-            reader.opf.isbn = None
-    if update_timestamp and mi.timestamp is not None:
-        reader.opf.timestamp = mi.timestamp
+    update_metadata(reader.opf, mi, apply_null=apply_null,
+                    update_timestamp=update_timestamp)

    newopf = StringIO(reader.opf.render())
    if isinstance(reader.archive, LocalZipFile):
--- a/src/calibre/ebooks/mobi/writer8/toc.py
+++ b/src/calibre/ebooks/mobi/writer8/toc.py
@ -60,7 +60,8 @@ class TOCAdder(object):
            else:
                oeb.guide.remove('toc')

-        if not self.has_toc or 'toc' in oeb.guide or opts.no_inline_toc:
+        if (not self.has_toc or 'toc' in oeb.guide or opts.no_inline_toc or
+            getattr(opts, 'mobi_passthrough', False)):
            return

        self.log('\tGenerating in-line ToC')
--- a/src/calibre/ebooks/oeb/display/webview.py
+++ b/src/calibre/ebooks/oeb/display/webview.py
@ -31,7 +31,8 @@ def self_closing_sub(match):
    return '<%s%s></%s>'%(match.group(1), match.group(2), match.group(1))

 def load_html(path, view, codec='utf-8', mime_type=None,
-        pre_load_callback=lambda x:None, path_is_html=False):
+              pre_load_callback=lambda x:None, path_is_html=False,
+              force_as_html=False):
    from PyQt4.Qt import QUrl, QByteArray
    if mime_type is None:
        mime_type = guess_type(path)[0]
@ -44,18 +45,20 @@ def load_html(path, view, codec='utf-8', mime_type=None,
            html = f.read().decode(codec, 'replace')

    html = EntityDeclarationProcessor(html).processed_html
-    has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None
-    self_closing_pat = re.compile(r'<\s*([A-Za-z1-6]+)([^>]*)/\s*>')
+    self_closing_pat = re.compile(r'<\s*([:A-Za-z0-9-]+)([^>]*)/\s*>')
    html = self_closing_pat.sub(self_closing_sub, html)

    loading_url = QUrl.fromLocalFile(path)
    pre_load_callback(loading_url)

-    if has_svg:
+    if force_as_html or re.search(r'<[:a-zA-Z0-9-]*svg', html) is None:
+        view.setHtml(html, loading_url)
+    else:
        view.setContent(QByteArray(html.encode(codec)), mime_type,
                loading_url)
-    else:
-        view.setHtml(html, loading_url)
-
-
+        mf = view.page().mainFrame()
+        elem = mf.findFirstElement('parsererror')
+        if not elem.isNull():
+            return False
+    return True

--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -7,12 +7,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, logging, sys, hashlib, uuid
+import os, logging, sys, hashlib, uuid, re
+from io import BytesIO
 from urllib import unquote as urlunquote, quote as urlquote
+from urlparse import urlparse

 from lxml import etree

-from calibre import guess_type, CurrentDir
+from calibre import guess_type as _guess_type, CurrentDir
 from calibre.customize.ui import (plugin_for_input_format,
        plugin_for_output_format)
 from calibre.ebooks.chardet import xml_to_unicode
@ -33,7 +35,10 @@ from calibre.utils.zipfile import ZipFile

 exists, join, relpath = os.path.exists, os.path.join, os.path.relpath

-OEB_FONTS = {guess_type('a.ttf')[0], guess_type('b.ttf')[0]}
+def guess_type(x):
+    return _guess_type(x)[0] or 'application/octet-stream'
+
+OEB_FONTS = {guess_type('a.ttf'), guess_type('b.ttf')}
 OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS}

 class Container(object):
@ -49,6 +54,9 @@ class Container(object):
          directory. They always contain POSIX separators and are unquoted. They
          can be thought of as canonical identifiers for files in the book.
          Most methods on the container object work with names.
+
+    When converting between hrefs and names use the methods provided by this
+    class, they assume all hrefs are quoted.
    '''

    book_type = 'oeb'
@ -72,12 +80,12 @@ class Container(object):
                path = join(dirpath, f)
                name = self.abspath_to_name(path)
                self.name_path_map[name] = path
-                self.mime_map[name] = guess_type(path)[0]
+                self.mime_map[name] = guess_type(path)
                # Special case if we have stumbled onto the opf
                if path == opfpath:
                    self.opf_name = name
                    self.opf_dir = os.path.dirname(path)
-                    self.mime_map[name] = guess_type('a.opf')[0]
+                    self.mime_map[name] = guess_type('a.opf')

        if not hasattr(self, 'opf_name'):
            raise InvalidBook('Book has no OPF file')
@ -93,16 +101,22 @@ class Container(object):
    def name_to_abspath(self, name):
        return os.path.abspath(join(self.root, *name.split('/')))

+    def exists(self, name):
+        return os.path.exists(self.name_to_abspath(name))
+
    def href_to_name(self, href, base=None):
        '''
        Convert an href (relative to base) to a name. base must be a name or
-        None, in which self.root is used.
+        None, in which case self.root is used.
        '''
        if base is None:
            base = self.root
        else:
            base = os.path.dirname(self.name_to_abspath(base))
-        href = urlunquote(href.partition('#')[0])
+        purl = urlparse(href)
+        if purl.scheme or not purl.path or purl.path.startswith('/'):
+            return None
+        href = urlunquote(purl.path)
        fullpath = os.path.join(base, *href.split('/'))
        return self.abspath_to_name(fullpath)

@ -195,7 +209,7 @@ class Container(object):
    def parsed(self, name):
        ans = self.parsed_cache.get(name, None)
        if ans is None:
-            mime = self.mime_map.get(name, guess_type(name)[0])
+            mime = self.mime_map.get(name, guess_type(name))
            ans = self.parse(self.name_path_map[name], mime)
            self.parsed_cache[name] = ans
        return ans
@ -205,10 +219,26 @@ class Container(object):
        return self.parsed(self.opf_name)

    @property
-    def spine_items(self):
-        manifest_id_map = {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
+    def mi(self):
+        from calibre.ebooks.metadata.opf2 import OPF as O
+        mi = self.serialize_item(self.opf_name)
+        return O(BytesIO(mi), basedir=self.opf_dir, unquote_urls=False,
+                populate_spine=False).to_book_metadata()
+
+    @property
+    def manifest_id_map(self):
+        return {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
            for item in self.opf_xpath('//opf:manifest/opf:item[@href and @id]')}

+    @property
+    def guide_type_map(self):
+        return {item.get('type', ''):self.href_to_name(item.get('href'), self.opf_name)
+            for item in self.opf_xpath('//opf:guide/opf:reference[@href and @type]')}
+
+    @property
+    def spine_items(self):
+        manifest_id_map = self.manifest_id_map
+
        linear, non_linear = [], []
        for item in self.opf_xpath('//opf:spine/opf:itemref[@idref]'):
            idref = item.get('idref')
@ -248,8 +278,8 @@ class Container(object):
                self.remove_from_xml(item)
                self.dirty(self.opf_name)

-        path = self.name_path_map.pop(name)
-        if os.path.exists(path):
+        path = self.name_path_map.pop(name, None)
+        if path and os.path.exists(path):
            os.remove(path)
        self.mime_map.pop(name, None)
        self.parsed_cache.pop(name, None)
@ -298,15 +328,24 @@ class Container(object):
            if idx == len(parent)-1:
                parent[idx-1].tail = parent.text

+    def opf_get_or_create(self, name):
+        ans = self.opf_xpath('//opf:'+name)
+        if ans:
+            return ans[0]
+        self.dirty(self.opf_name)
+        package = self.opf_xpath('//opf:package')[0]
+        item = package.makeelement(OPF(name))
+        item.tail = '\n'
+        package.append(item)
+        return item
+
    def generate_item(self, name, id_prefix=None, media_type=None):
        '''Add an item to the manifest with href derived from the given
        name. Ensures uniqueness of href and id automatically. Returns
        generated item.'''
        id_prefix = id_prefix or 'id'
-        media_type = media_type or guess_type(name)[0]
-        path = self.name_to_abspath(name)
-        relpath = self.relpath(path, base=self.opf_dir)
-        href = urlquote(relpath)
+        media_type = media_type or guess_type(name)
+        href = self.name_to_href(name, self.opf_name)
        base, ext = href.rpartition('.')[0::2]
        all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
        c = 0
@ -316,25 +355,75 @@ class Container(object):
            item_id = id_prefix + '%d'%c
        all_names = {x.get('href') for x in self.opf_xpath(
                '//opf:manifest/opf:item[@href]')}
+
+        def exists(h):
+            return self.exists(self.href_to_name(h, self.opf_name))
+
        c = 0
-        while href in all_names:
+        while href in all_names or exists(href):
            c += 1
            href = '%s_%d.%s'%(base, c, ext)
        manifest = self.opf_xpath('//opf:manifest')[0]
-        item = manifest.makeelement(OPF('item'), nsmap=OPF_NAMESPACES,
+        item = manifest.makeelement(OPF('item'),
                                    id=item_id, href=href)
        item.set('media-type', media_type)
        self.insert_into_xml(manifest, item)
        self.dirty(self.opf_name)
+        name = self.href_to_name(href, self.opf_name)
+        self.name_path_map[name] = self.name_to_abspath(name)
+        self.mime_map[name] = media_type
        return item

+    def format_opf(self):
+        mdata = self.opf_xpath('//opf:metadata')[0]
+        mdata.text = '\n    '
+        remove = set()
+        for child in mdata:
+            child.tail = '\n    '
+            if (child.get('name', '').startswith('calibre:') and
+                child.get('content', '').strip() in {'{}', ''}):
+                remove.add(child)
+        for child in remove: mdata.remove(child)
+        if len(mdata) > 0:
+            mdata[-1].tail = '\n  '
+
+    def serialize_item(self, name):
+        data = self.parsed(name)
+        if name == self.opf_name:
+            self.format_opf()
+        data = serialize(data, self.mime_map[name])
+        if name == self.opf_name:
+            # Needed as I can't get lxml to output opf:role and
+            # not output <opf:metadata> as well
+            data = re.sub(br'(<[/]{0,1})opf:', r'\1', data)
+        return data
+
+    def commit_item(self, name):
+        if name not in self.parsed_cache:
+            return
+        data = self.serialize_item(name)
+        self.dirtied.remove(name)
+        self.parsed_cache.pop(name)
+        with open(self.name_path_map[name], 'wb') as f:
+            f.write(data)
+
+    def open(self, name, mode='rb'):
+        ''' Open the file pointed to by name for direct read/write. Note that
+        this will commit the file if it is dirtied and remove it from the parse
+        cache. You must finish with this file before accessing the parsed
+        version of it again, or bad things will happen. '''
+        if name in self.dirtied:
+            self.commit_item(name)
+        self.parsed_cache.pop(name, False)
+        path = self.name_to_abspath(name)
+        base = os.path.dirname(path)
+        if not os.path.exists(base):
+            os.makedirs(base)
+        return open(path, mode)
+
    def commit(self, outpath=None):
        for name in tuple(self.dirtied):
-            self.dirtied.remove(name)
-            data = self.parsed_cache.pop(name)
-            data = serialize(data, self.mime_map[name])
-            with open(self.name_path_map[name], 'wb') as f:
-                f.write(data)
+            self.commit_item(name)

    def compare_to(self, other):
        if set(self.name_path_map) != set(other.name_path_map):
@ -390,7 +479,7 @@ class EpubContainer(Container):
        self.container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = self.container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
-            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
+            '[@media-type="%s" and @full-path]'%guess_type('a.opf')
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
@ -469,7 +558,7 @@ class EpubContainer(Container):
            outpath = self.pathtoepub
        from calibre.ebooks.tweak import zip_rebuilder
        with open(join(self.root, 'mimetype'), 'wb') as f:
-            f.write(guess_type('a.epub')[0])
+            f.write(guess_type('a.epub'))
        zip_rebuilder(self.root, outpath)

 # }}}
--- a/src/calibre/ebooks/oeb/polish/cover.py
+++ b/src/calibre/ebooks/oeb/polish/cover.py
@ -7,9 +7,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import shutil
+import shutil, re, os

-from calibre.ebooks.oeb.base import OPF
+from calibre.ebooks.oeb.base import OPF, OEB_DOCS, XPath, XLINK, xml2text
+from calibre.ebooks.oeb.polish.replace import replace_links
+from calibre.utils.magick.draw import identify

 def set_azw3_cover(container, cover_path, report):
    name = None
@ -26,11 +28,237 @@ def set_azw3_cover(container, cover_path, report):
    guide = container.opf_xpath('//opf:guide')[0]
    container.insert_into_xml(guide, guide.makeelement(
        OPF('reference'), href=href, type='cover'))
-    shutil.copyfile(cover_path, container.name_to_abspath(name))
+    with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest:
+        shutil.copyfileobj(src, dest)
    container.dirty(container.opf_name)
    report('Cover updated' if found else 'Cover inserted')

 def set_cover(container, cover_path, report):
    if container.book_type == 'azw3':
        set_azw3_cover(container, cover_path, report)
+    else:
+        set_epub_cover(container, cover_path, report)
+
+###############################################################################
+# The delightful EPUB cover processing
+
+def is_raster_image(media_type):
+    return media_type and media_type.lower() in {
+        'image/png', 'image/jpeg', 'image/jpg', 'image/gif'}
+
+COVER_TYPES = {            'coverimagestandard', 'other.ms-coverimage-standard',
+            'other.ms-titleimage-standard', 'other.ms-titleimage',
+            'other.ms-coverimage', 'other.ms-thumbimage-standard',
+            'other.ms-thumbimage', 'thumbimagestandard', 'cover'}
+
+def find_cover_image(container):
+    'Find a raster image marked as a cover in the OPF'
+    manifest_id_map = container.manifest_id_map
+    mm = container.mime_map
+    for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
+        item_id = meta.get('content')
+        name = manifest_id_map.get(item_id, None)
+        media_type = mm.get(name, None)
+        if is_raster_image(media_type):
+            return name
+
+    # First look for a guide item with type == 'cover'
+    guide_type_map = container.guide_type_map
+    for ref_type, name in guide_type_map.iteritems():
+        if ref_type.lower() == 'cover' and is_raster_image(mm.get(name, None)):
+            return name
+
+    # Find the largest image from all possible guide cover items
+    largest_cover = (None, 0)
+    for ref_type, name in guide_type_map.iteritems():
+        if ref_type.lower() in COVER_TYPES and is_raster_image(mm.get(name, None)):
+            path = container.name_path_map.get(name, None)
+            if path:
+                sz = os.path.getsize(path)
+                if sz > largest_cover[1]:
+                    largest_cover = (name, sz)
+
+    if largest_cover[0]:
+        return largest_cover[0]
+
+def find_cover_page(container):
+    'Find a document marked as a cover in the OPF'
+    mm = container.mime_map
+    guide_type_map = container.guide_type_map
+    for ref_type, name in guide_type_map.iteritems():
+        if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS:
+            return name
+
+def find_cover_image_in_page(container, cover_page):
+    root = container.parsed(cover_page)
+    body = XPath('//h:body')(root)
+    if len(body) != 1: return
+    body = body[0]
+    images = []
+    for img in XPath('descendant::h:img[@src]|descendant::svg:svg/descendant::svg:image')(body):
+        href = img.get('src') or img.get(XLINK('href'))
+        if href:
+            name = container.href_to_name(href, base=cover_page)
+            images.append(name)
+    text = re.sub(r'\s+', '', xml2text(body))
+    if text or len(images) > 1:
+        # Document has more content than a single image
+        return
+    if images:
+        return images[0]
+
+def clean_opf(container):
+    'Remove all references to covers from the OPF'
+    manifest_id_map = container.manifest_id_map
+    for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
+        name = manifest_id_map.get(meta.get('content', None), None)
+        container.remove_from_xml(meta)
+        if name and name in container.name_path_map:
+            yield name
+
+    gtm = container.guide_type_map
+    for ref in container.opf_xpath('//opf:guide/opf:reference[@type]'):
+        typ = ref.get('type', '')
+        if typ.lower() in COVER_TYPES:
+            container.remove_from_xml(ref)
+            name = gtm.get(typ, None)
+            if name and name in container.name_path_map:
+                yield name
+
+    container.dirty(container.opf_name)
+
+def create_epub_cover(container, cover_path):
+    from calibre.ebooks.conversion.config import load_defaults
+    from calibre.ebooks.oeb.transforms.cover import CoverManager
+
+    ext = cover_path.rpartition('.')[-1].lower()
+    raster_cover_item = container.generate_item('cover.'+ext, id_prefix='cover')
+    raster_cover = container.href_to_name(raster_cover_item.get('href'),
+                                          container.opf_name)
+    with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest:
+        shutil.copyfileobj(src, dest)
+    opts = load_defaults('epub_output')
+    keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
+    no_svg = opts.get('no_svg_cover', False)
+    if no_svg:
+        style = 'style="height: 100%%"'
+        templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
+    else:
+        width, height = 600, 800
+        try:
+            width, height = identify(cover_path)[:2]
+        except:
+            container.log.exception("Failed to get width and height of cover")
+        ar = 'xMidYMid meet' if keep_aspect else 'none'
+        templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
+        templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height))
+        templ = templ.replace('__width__',  str(width))
+        templ = templ.replace('__height__', str(height))
+    titlepage_item = container.generate_item('titlepage.xhtml',
+                                             id_prefix='titlepage')
+    titlepage = container.href_to_name(titlepage_item.get('href'),
+                                          container.opf_name)
+    raw = templ%container.name_to_href(raster_cover).encode('utf-8')
+    with container.open(titlepage, 'wb') as f:
+        f.write(raw)
+
+    # We have to make sure the raster cover item has id="cover" for the moron
+    # that wrote the Nook firmware
+    if raster_cover_item.get('id') != 'cover':
+        from calibre.ebooks.oeb.base import uuid_id
+        newid = uuid_id()
+        for item in container.opf_xpath('//*[@id="cover"]'):
+            item.set('id', newid)
+        for item in container.opf_xpath('//*[@idref="cover"]'):
+            item.set('idref', newid)
+        raster_cover_item.set('id', 'cover')
+
+    spine = container.opf_xpath('//opf:spine')[0]
+    ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
+    container.insert_into_xml(spine, ref, index=0)
+    guide = container.opf_get_or_create('guide')
+    container.insert_into_xml(guide, guide.makeelement(
+        OPF('reference'), type='cover', title=_('Cover'),
+        href=container.name_to_href(titlepage)))
+    metadata = container.opf_get_or_create('metadata')
+    meta = metadata.makeelement(OPF('meta'), name='cover')
+    meta.set('content', raster_cover_item.get('id'))
+    container.insert_into_xml(metadata, meta)
+
+    return raster_cover, titlepage
+
+def remove_cover_image_in_page(container, page, cover_images):
+    for img in container.parsed(page).xpath('//*[local-name()="img" and @src]'):
+        href = img.get('src')
+        name = container.href_to_name(href, page)
+        if name in cover_images:
+            img.getparent.remove(img)
+        break
+
+def set_epub_cover(container, cover_path, report):
+    cover_image = find_cover_image(container)
+    cover_page = find_cover_page(container)
+    wrapped_image = extra_cover_page = None
+    updated = False
+    log = container.log
+
+    possible_removals = set(clean_opf(container))
+    possible_removals
+    # TODO: Handle possible_removals and also iterate over links in the removed
+    # pages and handle possibly removing stylesheets referred to by them.
+
+    spine_items = tuple(container.spine_items)
+    if cover_page is None:
+        # Check if the first item in the spine is a simple cover wrapper
+        candidate = container.abspath_to_name(spine_items[0])
+        if find_cover_image_in_page(container, candidate) is not None:
+            cover_page = candidate
+
+    if cover_page is not None:
+        log('Found existing cover page')
+        wrapped_image = find_cover_image_in_page(container, cover_page)
+
+        if len(spine_items) > 1:
+            # Look for an extra cover page
+            c = container.abspath_to_name(spine_items[1])
+            if c != cover_page:
+                candidate = find_cover_image_in_page(container, c)
+                if candidate and candidate in {wrapped_image, cover_image}:
+                    log('Found an extra cover page that is a simple wrapper, removing it')
+                    # This page has only a single image and that image is the
+                    # cover image, remove it.
+                    container.remove_item(c)
+                    extra_cover_page = c
+                    spine_items = spine_items[:1] + spine_items[2:]
+                elif candidate is None:
+                    # Remove the cover image if it is the first image in this
+                    # page
+                    remove_cover_image_in_page(container, c, {wrapped_image,
+                                                          cover_image})
+
+        if wrapped_image is not None:
+            # The cover page is a simple wrapper around a single cover image,
+            # we can remove it safely.
+            log('Existing cover page is a simple wrapper, removing it')
+            container.remove_item(cover_page)
+            container.remove_item(wrapped_image)
+            updated = True
+
+    if cover_image and cover_image != wrapped_image:
+        # Remove the old cover image
+        container.remove_item(cover_image)
+
+    # Insert the new cover
+    raster_cover, titlepage = create_epub_cover(container, cover_path)
+
+    report('Cover updated' if updated else 'Cover inserted')
+
+    # Replace links to the old cover image/cover page
+    link_sub = {s:d for s, d in {
+        cover_page:titlepage, wrapped_image:raster_cover,
+        cover_image:raster_cover, extra_cover_page:titlepage}.iteritems()
+        if s is not None}
+    if link_sub:
+        replace_links(container, link_sub, frag_map=lambda x, y:None)
+

--- a/src/calibre/ebooks/oeb/polish/jacket.py
+++ b/src/calibre/ebooks/oeb/polish/jacket.py
@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.customize.ui import output_profiles
+from calibre.ebooks.conversion.config import load_defaults
+from calibre.ebooks.oeb.base import XPath, OPF
+from calibre.ebooks.oeb.polish.cover import find_cover_page
+from calibre.ebooks.oeb.transforms.jacket import render_jacket as render
+
+def render_jacket(mi):
+    ps = load_defaults('page_setup')
+    op = ps.get('output_profile', 'default')
+    opmap = {x.short_name:x for x in output_profiles()}
+    output_profile = opmap.get(op, opmap['default'])
+    return render(mi, output_profile)
+
+def is_legacy_jacket(root):
+    return len(root.xpath(
+        '//*[starts-with(@class,"calibrerescale") and (local-name()="h1" or local-name()="h2")]')) > 0
+
+def is_current_jacket(root):
+    return len(XPath(
+        '//h:meta[@name="calibre-content" and @content="jacket"]')(root)) > 0
+
+def find_existing_jacket(container):
+    for item in container.spine_items:
+        name = container.abspath_to_name(item)
+        if container.book_type == 'azw3':
+            root = container.parsed(name)
+            if is_current_jacket(root):
+                return name
+        else:
+            if name.rpartition('/')[-1].startswith('jacket') and name.endswith('.xhtml'):
+                root = container.parsed(name)
+                if is_current_jacket(root) or is_legacy_jacket(root):
+                    return name
+
+def replace_jacket(container, name):
+    root = render_jacket(container.mi)
+    container.parsed_cache[name] = root
+    container.dirty(name)
+
+def remove_jacket(container):
+    name = find_existing_jacket(container)
+    if name is not None:
+        container.remove_item(name)
+        return True
+    return False
+
+def add_or_replace_jacket(container):
+    name = find_existing_jacket(container)
+    found = True
+    if name is None:
+        jacket_item = container.generate_item('jacket.xhtml', id_prefix='jacket')
+        name = container.href_to_name(jacket_item.get('href'), container.opf_name)
+        found = False
+    replace_jacket(container, name)
+    if not found:
+        # Insert new jacket into spine
+        index = 0
+        sp = container.abspath_to_name(container.spine_items.next())
+        if sp == find_cover_page(container):
+            index = 1
+        itemref = container.opf.makeelement(OPF('itemref'),
+                                            idref=jacket_item.get('id'))
+        container.insert_into_xml(container.opf_xpath('//opf:spine')[0], itemref,
+                              index=index)
+    return found
+
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re
+import re, sys, os, time
 from collections import namedtuple
 from functools import partial

@ -15,12 +15,18 @@ from calibre.ebooks.oeb.polish.container import get_container
 from calibre.ebooks.oeb.polish.stats import StatsCollector
 from calibre.ebooks.oeb.polish.subset import subset_all_fonts
 from calibre.ebooks.oeb.polish.cover import set_cover
+from calibre.ebooks.oeb.polish.replace import smarten_punctuation
+from calibre.ebooks.oeb.polish.jacket import (
+    replace_jacket, add_or_replace_jacket, find_existing_jacket, remove_jacket)
 from calibre.utils.logging import Log

 ALL_OPTS = {
    'subset': False,
    'opf': None,
    'cover': None,
+    'jacket': False,
+    'remove_jacket':False,
+    'smarten_punctuation':False,
 }

 SUPPORTED = {'EPUB', 'AZW3'}
@ -38,8 +44,8 @@ changes needed for the desired effect.</p>

 <p>You should use this tool as the last step in your ebook creation process.</p>

-<p>Note that polishing only works on files in the <b>%s</b> formats.</p>
-''')%_(' or ').join(SUPPORTED),
+<p>Note that polishing only works on files in the %s formats.</p>
+''')%_(' or ').join('<b>%s</b>'%x for x in SUPPORTED),

 'subset': _('''\
 <p>Subsetting fonts means reducing an embedded font to contain
@ -59,6 +65,22 @@ characters or completely removed.</p>
 date you decide to add more text to your books, the newly added
 text might not be covered by the subset font.</p>
 '''),
+
+'jacket': _('''\
+<p>Insert a "book jacket" page at the start of the book that contains
+all the book metadata such as title, tags, authors, series, comments,
+etc.</p>'''),
+
+'remove_jacket': _('''\
+<p>Remove a previous inserted book jacket page.</p>
+'''),
+
+'smarten_punctuation': _('''\
+<p>Convert plain text dashes, ellipsis, quotes, multiple hyphens, etc. into their
+typographically correct equivalents.</p>
+<p>Note that the algorithm can sometimes generate incorrect results, especially
+when single quotes at the start of contractions are involved.</p>
+'''),
 }

 def hfix(name, raw):
@ -72,40 +94,100 @@ def hfix(name, raw):
 CLI_HELP = {x:hfix(x, re.sub('<.*?>', '', y)) for x, y in HELP.iteritems()}
 # }}}

+def update_metadata(ebook, new_opf):
+    from calibre.ebooks.metadata.opf2 import OPF
+    from calibre.ebooks.metadata.epub import update_metadata
+    opfpath = ebook.name_to_abspath(ebook.opf_name)
+    with ebook.open(ebook.opf_name, 'r+b') as stream, open(new_opf, 'rb') as ns:
+        opf = OPF(stream, basedir=os.path.dirname(opfpath), populate_spine=False,
+                  unquote_urls=False)
+        mi = OPF(ns, unquote_urls=False,
+                      populate_spine=False).to_book_metadata()
+        mi.cover, mi.cover_data = None, (None, None)
+
+        update_metadata(opf, mi, apply_null=True, update_timestamp=True)
+        stream.seek(0)
+        stream.truncate()
+        stream.write(opf.render())
+
 def polish(file_map, opts, log, report):
    rt = lambda x: report('\n### ' + x)
+    st = time.time()
    for inbook, outbook in file_map.iteritems():
-        report('Polishing: %s'%(inbook.rpartition('.')[-1].upper()))
+        report(_('## Polishing: %s')%(inbook.rpartition('.')[-1].upper()))
        ebook = get_container(inbook, log)
+        jacket = None

        if opts.subset:
            stats = StatsCollector(ebook)

-        if opts.subset:
-            rt('Subsetting embedded fonts')
-            subset_all_fonts(ebook, stats.font_stats, report)
-            report('')
+        if opts.opf:
+            rt(_('Updating metadata'))
+            update_metadata(ebook, opts.opf)
+            jacket = find_existing_jacket(ebook)
+            if jacket is not None:
+                replace_jacket(ebook, jacket)
+                report(_('Updated metadata jacket'))
+            report(_('Metadata updated\n'))

        if opts.cover:
-            rt('Setting cover')
+            rt(_('Setting cover'))
            set_cover(ebook, opts.cover, report)
            report('')

+        if opts.jacket:
+            rt(_('Inserting metadata jacket'))
+            if jacket is None:
+                if add_or_replace_jacket(ebook):
+                    report(_('Existing metadata jacket replaced'))
+                else:
+                    report(_('Metadata jacket inserted'))
+            else:
+                report(_('Existing metadata jacket replaced'))
+            report('')
+
+        if opts.remove_jacket:
+            rt(_('Removing metadata jacket'))
+            if remove_jacket(ebook):
+                report(_('Metadata jacket removed'))
+            else:
+                report(_('No metadata jacket found'))
+            report('')
+
+        if opts.smarten_punctuation:
+            rt(_('Smartening punctuation'))
+            smarten_punctuation(ebook, report)
+            report('')
+
+        if opts.subset:
+            rt(_('Subsetting embedded fonts'))
+            subset_all_fonts(ebook, stats.font_stats, report)
+            report('')
+
        ebook.commit(outbook)
+        report('-'*70)
+    report(_('Polishing took: %.1f seconds')%(time.time()-st))
+
+REPORT = '{0} REPORT {0}'.format('-'*30)

 def gui_polish(data):
    files = data.pop('files')
+    if not data.pop('metadata'):
+        data.pop('opf')
+        data.pop('cover')
    file_map = {x:x for x in files}
    opts = ALL_OPTS.copy()
    opts.update(data)
-    O = namedtuple('Options', ' '.join(data.iterkeys()))
+    O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys()))
    opts = O(**opts)
    log = Log(level=Log.DEBUG)
    report = []
    polish(file_map, opts, log, report.append)
-    log('\n', '-'*30, ' REPORT ', '-'*30)
+    log('')
+    log(REPORT)
    for msg in report:
        log(msg)
+    return '\n\n'.join(report)

 def option_parser():
    from calibre.utils.config import OptionParser
@ -115,16 +197,22 @@ def option_parser():
    a = parser.add_option
    o = partial(a, default=False, action='store_true')
    o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset'])
-    a('--cover', help=_(
+    a('--cover', '-c', help=_(
        'Path to a cover image. Changes the cover specified in the ebook. '
-        'If no cover is present, inserts a new cover.'))
+        'If no cover is present, or the cover is not properly identified, inserts a new cover.'))
+    a('--opf', '-o', help=_(
+        'Path to an OPF file. The metadata in the book is updated from the OPF file.'))
+    o('--jacket', '-j', help=CLI_HELP['jacket'])
+    o('--remove-jacket', help=CLI_HELP['remove_jacket'])
+    o('--smarten-punctuation', '-p', help=CLI_HELP['smarten_punctuation'])
+
    o('--verbose', help=_('Produce more verbose output, useful for debugging.'))

    return parser

-def main():
+def main(args=None):
    parser = option_parser()
-    opts, args = parser.parse_args()
+    opts, args = parser.parse_args(args or sys.argv[1:])
    log = Log(level=Log.DEBUG if opts.verbose else Log.INFO)
    if not args:
        parser.print_help()
@ -148,19 +236,14 @@ def main():
    O = namedtuple('Options', ' '.join(popts.iterkeys()))
    popts = O(**popts)
    report = []
-    something = False
-    for name in ALL_OPTS:
-        if name not in {'opf', }:
-            if getattr(popts, name):
-                something = True
-
-    if not something:
+    if not tuple(filter(None, (getattr(popts, name) for name in ALL_OPTS))):
        parser.print_help()
        log.error(_('You must specify at least one action to perform'))
        raise SystemExit(1)

    polish({inbook:outbook}, popts, log, report.append)
-    log('\n', '-'*30, ' REPORT ', '-'*30)
+    log('')
+    log(REPORT)
    for msg in report:
        log(msg)

--- a/src/calibre/ebooks/oeb/polish/replace.py
+++ b/src/calibre/ebooks/oeb/polish/replace.py
@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import codecs
+from urlparse import urlparse
+
+from cssutils import replaceUrls
+
+from calibre.ebooks.chardet import strip_encoding_declarations
+from calibre.ebooks.oeb.polish.container import guess_type
+from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, rewrite_links)
+
+class LinkReplacer(object):
+
+    def __init__(self, base, container, link_map, frag_map):
+        self.base = base
+        self.frag_map = frag_map
+        self.link_map = link_map
+        self.container = container
+        self.replaced = False
+
+    def __call__(self, url):
+        name = self.container.href_to_name(url, self.base)
+        if not name:
+            return url
+        nname = self.link_map.get(name, None)
+        if not nname:
+            return url
+        purl = urlparse(url)
+        href = self.container.name_to_href(nname, self.base)
+        if purl.fragment:
+            nfrag = self.frag_map(name, purl.fragment)
+            if nfrag:
+                href += '#%s'%nfrag
+        if href != url:
+            self.replaced = True
+        return href
+
+def replace_links(container, link_map, frag_map=lambda name, frag:frag):
+    ncx_type = guess_type('toc.ncx')
+    for name, media_type in container.mime_map.iteritems():
+        repl = LinkReplacer(name, container, link_map, frag_map)
+        if media_type.lower() in OEB_DOCS:
+            rewrite_links(container.parsed(name), repl)
+        elif media_type.lower() in OEB_STYLES:
+            replaceUrls(container.parsed(name), repl)
+        elif media_type.lower() == ncx_type:
+            for elem in container.parsed(name).xpath('//*[@src]'):
+                src = elem.get('src')
+                nsrc = repl(src)
+                if src != nsrc:
+                    elem.set('src', nsrc)
+
+        if repl.replaced:
+            container.dirty(name)
+
+def smarten_punctuation(container, report):
+    from calibre.ebooks.conversion.preprocess import smarten_punctuation
+    for path in container.spine_items:
+        name = container.abspath_to_name(path)
+        changed = False
+        with container.open(name, 'r+b') as f:
+            html = container.decode(f.read())
+            newhtml = smarten_punctuation(html, container.log)
+            if newhtml != html:
+                changed = True
+                report(_('Smartened punctuation in: %s')%name)
+                newhtml = strip_encoding_declarations(newhtml)
+                f.seek(0)
+                f.truncate()
+                f.write(codecs.BOM_UTF8 + newhtml.encode('utf-8'))
+        if changed:
+            # Add an encoding declaration (it will be added automatically when
+            # serialized)
+            root = container.parsed(name)
+            for m in root.xpath('descendant::*[local-name()="meta" and @http-equiv]'):
+                m.getparent().remove(m)
+            container.dirty(name)
+
--- a/src/calibre/ebooks/oeb/polish/stats.py
+++ b/src/calibre/ebooks/oeb/polish/stats.py
@ -61,9 +61,8 @@ def get_matching_rules(rules, font):
    # Filter on font stretch
    width = widths[font.get('font-stretch', 'normal')]

-    min_dist = min(abs(width-f['width']) for f in matches)
-    nearest = [f for f in matches if abs(width-f['width']) ==
-        min_dist]
+    min_dist = min(abs(width-y['width']) for y in matches)
+    nearest = [x for x in matches if abs(width-x['width']) == min_dist]
    if width <= 4:
        lmatches = [f for f in nearest if f['width'] <= width]
    else:
@ -108,6 +107,8 @@ class Page(QWebPage): # {{{
        self.js = None
        self.evaljs = self.mainFrame().evaluateJavaScript
        self.bridge_value = None
+        nam = self.networkAccessManager()
+        nam.setNetworkAccessible(nam.NotAccessible)

    def javaScriptConsoleMessage(self, msg, lineno, msgid):
        self.log(u'JS:', unicode(msg))
@ -199,6 +200,22 @@ class StatsCollector(object):

        self.render_book()

+    def href_to_name(self, href, warn_name):
+        if not href.startswith('file://'):
+            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
+            return None
+        src = href[len('file://'):]
+        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
+            src = src[1:]
+        src = src.replace('/', os.sep)
+        src = unquote(src)
+        name = self.container.abspath_to_name(src)
+        if not self.container.has_name(name):
+            self.log.warn('Missing resource', href, 'in', warn_name,
+                          'ignoring')
+            return None
+        return name
+
    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
@ -220,19 +237,7 @@ class StatsCollector(object):
            if not src: continue
            style = parseStyle('background-image:%s'%src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
-            if not src.startswith('file://'):
-                self.log.warn('Unknown URI in @font-face: %r'%src)
-                continue
-            src = src[len('file://'):]
-            if iswindows and src.startswith('/'):
-                src = src[1:]
-            src = src.replace('/', os.sep)
-            src = unquote(src)
-            name = self.container.abspath_to_name(src)
-            if not self.container.has_name(name):
-                self.log.warn('Font %r referenced in @font-face rule not found'
-                              %name)
-                continue
+            name = self.href_to_name(src, '@font-face rule')
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
--- a/src/calibre/ebooks/oeb/polish/subset.py
+++ b/src/calibre/ebooks/oeb/polish/subset.py
@ -54,7 +54,10 @@ def subset_all_fonts(container, font_stats, report):
                olen = sum(old_sizes.itervalues())
                nlen = sum(new_sizes.itervalues())
                total_new += len(nraw)
-                report('Decreased the font %s to %.1f%% of its original size'%
+                if nlen == olen:
+                    report('The font %s was already subset'%font_name)
+                else:
+                    report('Decreased the font %s to %.1f%% of its original size'%
                       (font_name, nlen/olen * 100))
                f.seek(0), f.truncate(), f.write(nraw)

--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -363,7 +363,10 @@ class CSSFlattener(object):
            cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium

        fsize = font_size
-        if not self.context.disable_font_rescaling:
+        is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in
+                       cssdict and len(node) == 0 and node.text and
+                       len(node.text) == 1)
+        if not self.context.disable_font_rescaling and not is_drop_cap:
            _sbase = self.sbase if self.sbase is not None else \
                self.context.source.fbase
            dyn_rescale = dynamic_rescale_factor(node)
@ -382,7 +385,7 @@ class CSSFlattener(object):

        try:
            minlh = self.context.minimum_line_height / 100.
-            if style['line-height'] < minlh * fsize:
+            if not is_drop_cap and style['line-height'] < minlh * fsize:
                cssdict['line-height'] = str(minlh)
        except:
            self.oeb.logger.exception('Failed to set minimum line-height')
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -146,7 +146,7 @@ class MergeMetadata(object):
                    return item.id
                self.remove_old_cover(item)
            elif not cdata:
-                id = self.oeb.manifest.generate(id='cover')
+                id = self.oeb.manifest.generate(id='cover')[0]
                self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
                return id
        if cdata:
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -117,8 +117,7 @@ class Split(object):
                continue

        page_breaks = list(page_breaks)
-        page_breaks.sort(cmp=
-              lambda x,y : cmp(int(x.get('pb_order')), int(y.get('pb_order'))))
+        page_breaks.sort(key=lambda x:int(x.get('pb_order')))
        page_break_ids, page_breaks_ = [], []
        for i, x in enumerate(page_breaks):
            x.set('id', x.get('id', 'calibre_pb_%d'%i))
@ -235,7 +234,8 @@ class FlowSplitter(object):
        for pattern, before in ordered_ids:
            elem = pattern(tree)
            if elem:
-                self.log.debug('\t\tSplitting on page-break')
+                self.log.debug('\t\tSplitting on page-break at %s'%
+                               elem[0].get('id'))
                before, after = self.do_split(tree, elem[0], before)
                self.trees.append(before)
                tree = after
@ -292,14 +292,9 @@ class FlowSplitter(object):

        return npath

-
-
    def do_split(self, tree, split_point, before):
        '''
-        Split ``tree`` into a *before* and *after* tree at ``split_point``,
-        preserving tag structure, but not duplicating any text.
-        All tags that have had their text and tail
-        removed have the attribute ``calibre_split`` set to 1.
+        Split ``tree`` into a *before* and *after* tree at ``split_point``.

        :param before: If True tree is split before split_point, otherwise after split_point
        :return: before_tree, after_tree
@ -315,8 +310,9 @@ class FlowSplitter(object):


        def nix_element(elem, top=True):
+            # Remove elem unless top is False in which case replace elem by its
+            # children
            parent = elem.getparent()
-            index = parent.index(elem)
            if top:
                parent.remove(elem)
            else:
@ -325,27 +321,38 @@ class FlowSplitter(object):

        # Tree 1
        hit_split_point = False
-        for elem in list(body.iterdescendants()):
+        keep_descendants = False
+        split_point_descendants = frozenset(split_point.iterdescendants())
+        for elem in tuple(body.iterdescendants()):
            if elem is split_point:
                hit_split_point = True
                if before:
                    nix_element(elem)
+                else:
+                    # We want to keep the descendants of the split point in
+                    # Tree 1
+                    keep_descendants = True

                continue
            if hit_split_point:
+                if keep_descendants:
+                    if elem in split_point_descendants:
+                        # elem is a descendant keep it
+                        continue
+                    else:
+                        # We are out of split_point, so prevent further set
+                        # lookups of split_point_descendants
+                        keep_descendants = False
                nix_element(elem)

-
        # Tree 2
-        hit_split_point = False
-        for elem in list(body2.iterdescendants()):
+        for elem in tuple(body2.iterdescendants()):
            if elem is split_point2:
-                hit_split_point = True
                if not before:
-                    nix_element(elem, top=False)
-                continue
-            if not hit_split_point:
-                nix_element(elem, top=False)
+                    nix_element(elem)
+                break
+            nix_element(elem, top=False)
+
        body2.text = '\n'

        return tree, tree2
@ -478,8 +485,7 @@ class FlowSplitter(object):

    def commit(self):
        '''
-        Commit all changes caused by the split. This removes the previously
-        introduced ``calibre_split`` attribute and calculates an *anchor_map* for
+        Commit all changes caused by the split. Calculates an *anchor_map* for
        all anchors in the original tree. Internal links are re-directed. The
        original file is deleted and the split files are saved.
        '''
--- a/src/calibre/ebooks/pdf/render/from_html.py
+++ b/src/calibre/ebooks/pdf/render/from_html.py
@ -16,6 +16,7 @@ from PyQt4.Qt import (QObject, QPainter, Qt, QSize, QString, QTimer,
 from PyQt4.QtWebKit import QWebView, QWebPage, QWebSettings

 from calibre import fit_image
+from calibre.constants import iswindows
 from calibre.ebooks.oeb.display.webview import load_html
 from calibre.ebooks.pdf.render.common import (inch, cm, mm, pica, cicero,
                                              didot, PAPER_SIZES)
@ -251,16 +252,33 @@ class PDFWriter(QObject):
    def current_page_num(self):
        return self.doc.current_page_num

+    def load_mathjax(self):
+        evaljs = self.view.page().mainFrame().evaluateJavaScript
+        mjpath = P(u'viewer/mathjax').replace(os.sep, '/')
+        if iswindows:
+            mjpath = u'/' + mjpath
+        if evaljs('''
+                    window.mathjax.base = %s;
+                    mathjax.check_for_math(); mathjax.math_present
+                    '''%(json.dumps(mjpath, ensure_ascii=False))).toBool():
+            self.log.debug('Math present, loading MathJax')
+            while not evaljs('mathjax.math_loaded').toBool():
+                self.loop.processEvents(self.loop.ExcludeUserInputEvents)
+            evaljs('document.getElementById("MathJax_Message").style.display="none";')
+
    def do_paged_render(self):
        if self.paged_js is None:
-            from calibre.utils.resources import compiled_coffeescript
-            self.paged_js =  compiled_coffeescript('ebooks.oeb.display.utils')
-            self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing')
-            self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged')
+            from calibre.utils.resources import compiled_coffeescript as cc
+            self.paged_js =  cc('ebooks.oeb.display.utils')
+            self.paged_js += cc('ebooks.oeb.display.indexing')
+            self.paged_js += cc('ebooks.oeb.display.paged')
+            self.paged_js += cc('ebooks.oeb.display.mathjax')

        self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self)
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
+        self.load_mathjax()
+
        evaljs('''
        py_bridge.__defineGetter__('value', function() {
            return JSON.parse(this._pass_json_value);
--- a/src/calibre/gui2/actions/polish.py
+++ b/src/calibre/gui2/actions/polish.py
@ -7,35 +7,52 @@ __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, weakref, shutil
+import os, weakref, shutil, textwrap
 from collections import OrderedDict
+from functools import partial

 from PyQt4.Qt import (QDialog, QGridLayout, QIcon, QCheckBox, QLabel, QFrame,
                      QApplication, QDialogButtonBox, Qt, QSize, QSpacerItem,
-                      QSizePolicy, QTimer)
+                      QSizePolicy, QTimer, QModelIndex, QTextEdit,
+                      QInputDialog, QMenu)

-from calibre.gui2 import error_dialog, Dispatcher
+from calibre.gui2 import error_dialog, Dispatcher, gprefs
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2.convert.metadata import create_opf_file
 from calibre.gui2.dialogs.progress import ProgressDialog
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.config_base import tweaks

-
-class Polish(QDialog):
+class Polish(QDialog): # {{{

    def __init__(self, db, book_id_map, parent=None):
        from calibre.ebooks.oeb.polish.main import HELP
        QDialog.__init__(self, parent)
        self.db, self.book_id_map = weakref.ref(db), book_id_map
        self.setWindowIcon(QIcon(I('polish.png')))
-        self.setWindowTitle(ngettext(
-            'Polish book', _('Polish %d books')%len(book_id_map), len(book_id_map)))
+        title = _('Polish book')
+        if len(book_id_map) > 1:
+            title = _('Polish %d books')%len(book_id_map)
+        self.setWindowTitle(title)

        self.help_text = {
            'polish': _('<h3>About Polishing books</h3>%s')%HELP['about'],

            'subset':_('<h3>Subsetting fonts</h3>%s')%HELP['subset'],
+
+            'smarten_punctuation':
+            _('<h3>Smarten punctuation</h3>%s')%HELP['smarten_punctuation'],
+
+            'metadata':_('<h3>Updating metadata</h3>'
+                         '<p>This will update all metadata and covers in the'
+                         ' ebook files to match the current metadata in the'
+                         ' calibre library.</p><p>If the ebook file does not have'
+                         ' an identifiable cover, a new cover is inserted.</p>'
+                         ' <p>Note that most ebook'
+                         ' formats are not capable of supporting all the'
+                         ' metadata in calibre.</p>'),
+            'jacket':_('<h3>Book Jacket</h3>%s')%HELP['jacket'],
+            'remove_jacket':_('<h3>Remove Book Jacket</h3>%s')%HELP['remove_jacket'],
        }

        self.l = l = QGridLayout()
@ -45,12 +62,19 @@ class Polish(QDialog):
        l.addWidget(la, 0, 0, 1, 2)

        count = 0
-        self.actions = OrderedDict([
+        self.all_actions = OrderedDict([
            ('subset', _('Subset all embedded fonts')),
+            ('smarten_punctuation', _('Smarten punctuation')),
+            ('metadata', _('Update metadata in book files')),
+            ('jacket', _('Add metadata as a "book jacket" page')),
+            ('remove_jacket', _('Remove a previously inserted book jacket')),
        ])
-        for name, text in self.actions.iteritems():
+        prefs = gprefs.get('polishing_settings', {})
+        for name, text in self.all_actions.iteritems():
            count += 1
            x = QCheckBox(text, self)
+            x.setChecked(prefs.get(name, False))
+            x.stateChanged.connect(partial(self.option_toggled, name))
            l.addWidget(x, count, 0, 1, 1)
            setattr(self, 'opt_'+name, x)
            la = QLabel(' <a href="#%s">%s</a>'%(name, _('About')))
@ -72,28 +96,106 @@ class Polish(QDialog):
        l.addWidget(la, 0, 2, count+1, 1)
        l.setColumnStretch(2, 1)

+        self.show_reports = sr = QCheckBox(_('Show &report'), self)
+        sr.setChecked(gprefs.get('polish_show_reports', True))
+        sr.setToolTip(textwrap.fill(_('Show a report of all the actions performed'
+                        ' after polishing is completed')))
+        l.addWidget(sr, count+1, 0, 1, 1)
        self.bb = bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel)
        bb.accepted.connect(self.accept)
        bb.rejected.connect(self.reject)
-        l.addWidget(bb, count+1, 0, 1, -1)
+        self.save_button = sb = bb.addButton(_('&Save Settings'), bb.ActionRole)
+        sb.clicked.connect(self.save_settings)
+        self.load_button = lb = bb.addButton(_('&Load Settings'), bb.ActionRole)
+        self.load_menu = QMenu(lb)
+        lb.setMenu(self.load_menu)
+        self.all_button = b = bb.addButton(_('Select &all'), bb.ActionRole)
+        b.clicked.connect(partial(self.select_all, True))
+        self.none_button = b = bb.addButton(_('Select &none'), bb.ActionRole)
+        b.clicked.connect(partial(self.select_all, False))
+        l.addWidget(bb, count+1, 1, 1, -1)
+        self.setup_load_button()

-        self.resize(QSize(800, 600))
+        self.resize(QSize(950, 600))
+
+    def select_all(self, enable):
+        for action in self.all_actions:
+            x = getattr(self, 'opt_'+action)
+            x.blockSignals(True)
+            x.setChecked(enable)
+            x.blockSignals(False)
+
+    def save_settings(self):
+        if not self.something_selected:
+            return error_dialog(self, _('No actions selected'),
+                _('You must select at least one action before saving'),
+                                show=True)
+        name, ok = QInputDialog.getText(self, _('Choose name'),
+                _('Choose a name for these settings'))
+        if ok:
+            name = unicode(name).strip()
+            if name:
+                settings = {ac:getattr(self, 'opt_'+ac).isChecked() for ac in
+                            self.all_actions}
+                saved = gprefs.get('polish_settings', {})
+                saved[name] = settings
+                gprefs.set('polish_settings', saved)
+                self.setup_load_button()
+
+    def setup_load_button(self):
+        saved = gprefs.get('polish_settings', {})
+        m = self.load_menu
+        m.clear()
+        self.__actions = []
+        a = self.__actions.append
+        for name in sorted(saved):
+            a(m.addAction(name, partial(self.load_settings, name)))
+        m.addSeparator()
+        a(m.addAction(_('Remove saved settings'), self.clear_settings))
+        self.load_button.setEnabled(bool(saved))
+
+    def clear_settings(self):
+        gprefs.set('polish_settings', {})
+        self.setup_load_button()
+
+    def load_settings(self, name):
+        saved = gprefs.get('polish_settings', {}).get(name, {})
+        for action in self.all_actions:
+            checked = saved.get(action, False)
+            x = getattr(self, 'opt_'+action)
+            x.blockSignals(True)
+            x.setChecked(checked)
+            x.blockSignals(False)
+
+    def option_toggled(self, name, state):
+        if state == Qt.Checked:
+            self.help_label.setText(self.help_text[name])

    def help_link_activated(self, link):
        link = unicode(link)[1:]
        self.help_label.setText(self.help_text[link])

+    @property
+    def something_selected(self):
+        for action in self.all_actions:
+            if getattr(self, 'opt_'+action).isChecked():
+                return True
+        return False
+
    def accept(self):
        self.actions = ac = {}
+        saved_prefs = {}
+        gprefs['polish_show_reports'] = bool(self.show_reports.isChecked())
        something = False
-        for action in self.actions:
-            ac[action] = bool(getattr(self, 'opt_'+action).isChecked())
+        for action in self.all_actions:
+            ac[action] = saved_prefs[action] = bool(getattr(self, 'opt_'+action).isChecked())
            if ac[action]:
                something = True
        if not something:
            return error_dialog(self, _('No actions selected'),
                _('You must select at least one action, or click Cancel.'),
                                show=True)
+        gprefs['polishing_settings'] = saved_prefs
        self.queue_files()
        return super(Polish, self).accept()

@ -123,6 +225,7 @@ class Polish(QDialog):
            self.do_book(num, book_id, self.book_id_map[book_id])
        except:
            self.pd.reject()
+            raise
        else:
            self.pd.set_value(num)
            QTimer.singleShot(0, self.do_one)
@ -148,13 +251,107 @@ class Polish(QDialog):

        desc = ngettext(_('Polish %s')%mi.title,
                        _('Polish book %(nums)s of %(tot)s (%(title)s)')%dict(
-                            num=num, tot=len(self.book_id_map),
+                            nums=num, tot=len(self.book_id_map),
                            title=mi.title), len(self.book_id_map))
        if hasattr(self, 'pd'):
            self.pd.set_msg(_('Queueing book %(nums)s of %(tot)s (%(title)s)')%dict(
-                            num=num, tot=len(self.book_id_map), title=mi.title))
+                            nums=num, tot=len(self.book_id_map), title=mi.title))

        self.jobs.append((desc, data, book_id, base))
+# }}}
+
+class Report(QDialog): # {{{
+
+    def __init__(self, parent):
+        QDialog.__init__(self, parent)
+        self.gui = parent
+        self.setAttribute(Qt.WA_DeleteOnClose, False)
+        self.setWindowIcon(QIcon(I('polish.png')))
+        self.reports = []
+
+        self.l = l = QGridLayout()
+        self.setLayout(l)
+        self.view = v = QTextEdit(self)
+        v.setReadOnly(True)
+        l.addWidget(self.view, 0, 0, 1, 2)
+
+        self.backup_msg = la = QLabel('')
+        l.addWidget(la, 1, 0, 1, 2)
+        la.setVisible(False)
+        la.setWordWrap(True)
+
+        self.ign_msg = _('Ignore remaining %d reports')
+        self.ign = QCheckBox(self.ign_msg, self)
+        l.addWidget(self.ign, 2, 0)
+
+        bb = self.bb = QDialogButtonBox(QDialogButtonBox.Close)
+        bb.accepted.connect(self.accept)
+        bb.rejected.connect(self.reject)
+        b = self.log_button = bb.addButton(_('View full &log'), bb.ActionRole)
+        b.clicked.connect(self.view_log)
+        bb.button(bb.Close).setDefault(True)
+        l.addWidget(bb, 2, 1)
+
+        self.finished.connect(self.show_next, type=Qt.QueuedConnection)
+
+        self.resize(QSize(800, 600))
+
+    def setup_ign(self):
+        self.ign.setText(self.ign_msg%len(self.reports))
+        self.ign.setVisible(bool(self.reports))
+        self.ign.setChecked(False)
+
+    def __call__(self, *args):
+        self.reports.append(args)
+        self.setup_ign()
+        if not self.isVisible():
+            self.show_next()
+
+    def show_report(self, book_title, book_id, fmts, job, report):
+        from calibre.ebooks.markdown.markdown import markdown
+        self.current_log = job.details
+        self.setWindowTitle(_('Polishing of %s')%book_title)
+        self.view.setText(markdown('# %s\n\n'%book_title + report,
+                                   output_format='html4'))
+        self.bb.button(self.bb.Close).setFocus(Qt.OtherFocusReason)
+        self.backup_msg.setVisible(bool(fmts))
+        if fmts:
+            m = ngettext('The original file has been saved as %s.',
+                     'The original files have been saved as %s.', len(fmts))%(
+                _(' and ').join('ORIGINAL_'+f for f in fmts)
+                     )
+            self.backup_msg.setText(m + ' ' + _(
+                'If you polish again, the polishing will run on the originals.')%(
+                ))
+
+    def view_log(self):
+        self.view.setPlainText(self.current_log)
+        self.view.verticalScrollBar().setValue(0)
+
+    def show_next(self, *args):
+        if not self.reports:
+            return
+        if not self.isVisible():
+            self.show()
+        self.show_report(*self.reports.pop(0))
+        self.setup_ign()
+
+    def accept(self):
+        if self.ign.isChecked():
+            self.reports = []
+        if self.reports:
+            self.show_next()
+            return
+        super(Report, self).accept()
+
+    def reject(self):
+        if self.ign.isChecked():
+            self.reports = []
+        if self.reports:
+            self.show_next()
+            return
+        super(Report, self).reject()
+# }}}

 class PolishAction(InterfaceAction):

@ -165,6 +362,7 @@ class PolishAction(InterfaceAction):

    def genesis(self):
        self.qaction.triggered.connect(self.polish_books)
+        self.report = Report(self.gui)

    def location_selected(self, loc):
        enabled = loc == 'library'
@ -205,21 +403,28 @@ class PolishAction(InterfaceAction):
            return
        d = Polish(self.gui.library_view.model().db, book_id_map, parent=self.gui)
        if d.exec_() == d.Accepted and d.jobs:
-            for desc, data, book_id, base, files in reversed(d.jobs):
+            show_reports = bool(d.show_reports.isChecked())
+            for desc, data, book_id, base in reversed(d.jobs):
                job = self.gui.job_manager.run_job(
                    Dispatcher(self.book_polished), 'gui_polish', args=(data,),
                    description=desc)
-                job.polish_args = (book_id, base, data['files'])
+                job.polish_args = (book_id, base, data['files'], show_reports)
+            if d.jobs:
+                self.gui.jobs_pointer.start()
+                self.gui.status_bar.show_message(
+                    _('Start polishing of %d book(s)') % len(d.jobs), 2000)

    def book_polished(self, job):
        if job.failed:
            self.gui.job_exception(job)
            return
        db = self.gui.current_db
-        book_id, base, files = job.polish_args
+        book_id, base, files, show_reports = job.polish_args
+        fmts = set()
        for path in files:
            fmt = path.rpartition('.')[-1].upper()
-            if tweaks['save_original_format']:
+            if tweaks['save_original_format_when_polishing']:
+                fmts.add(fmt)
                db.save_original_format(book_id, fmt, notify=False)
            with open(path, 'rb') as f:
                db.add_format(book_id, fmt, f, index_is_id=True)
@ -231,6 +436,13 @@ class PolishAction(InterfaceAction):
            os.rmdir(parent)
        except:
            pass
+        self.gui.tags_view.recount()
+        if self.gui.current_view() is self.gui.library_view:
+            current = self.gui.library_view.currentIndex()
+            if current.isValid():
+                self.gui.library_view.model().current_changed(current, QModelIndex())
+        if show_reports:
+            self.report(db.title(book_id, index_is_id=True), book_id, fmts, job, job.result)

 if __name__ == '__main__':
    app = QApplication([])
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -512,7 +512,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
        self.test_text.editTextChanged[str].connect(self.s_r_paint_results)
        self.comma_separated.stateChanged.connect(self.s_r_paint_results)
        self.case_sensitive.stateChanged.connect(self.s_r_paint_results)
-        self.s_r_src_ident.currentIndexChanged[int].connect(self.s_r_paint_results)
+        self.s_r_src_ident.currentIndexChanged[int].connect(self.s_r_identifier_type_changed)
        self.s_r_dst_ident.textChanged.connect(self.s_r_paint_results)
        self.s_r_template.lost_focus.connect(self.s_r_template_changed)
        self.central_widget.setCurrentIndex(0)
@ -576,9 +576,9 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
            elif not fm['is_multiple']:
                val = [val]
            elif fm['datatype'] == 'composite':
-                val = [v.strip() for v in val.split(fm['is_multiple']['ui_to_list'])]
+                val = [v2.strip() for v2 in val.split(fm['is_multiple']['ui_to_list'])]
            elif field == 'authors':
-                val = [v.replace('|', ',') for v in val]
+                val = [v2.replace('|', ',') for v2 in val]
        else:
            val = []
        if not val:
@ -591,6 +591,10 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
    def s_r_template_changed(self):
        self.s_r_search_field_changed(self.search_field.currentIndex())

+    def s_r_identifier_type_changed(self, idx):
+        self.s_r_search_field_changed(self.search_field.currentIndex())
+        self.s_r_paint_results(idx)
+
    def s_r_search_field_changed(self, idx):
        self.s_r_template.setVisible(False)
        self.template_label.setVisible(False)
--- a/Show More
+++ b/Show More